# Replication Code

Sys.setlocale("LC_ALL", 'en_US.UTF-8')
library(plyr)
library(dplyr) 
library(lmtest)
library(MASS)
library(stargazer)
library(AER)
library(Zelig)
library(pastecs)
library(xtable)
library(lubridate)
library(RgoogleMaps)
library(RColorBrewer)
library(scales)
library(wordcloud)

datadir = '~/Dropbox/China Protest Paper/Production Materials/Replication/'
figdir = '~/Dropbox/China Protest Paper/Production Materials/Figures/'

df = read.csv(paste(datadir,'df_nation_day_rr_3.csv',sep=""));df$X=NULL;df$date=as.Date(df$date);names(df)
df$logGRP = log(df$GRP)
df$logPop = log(df$Population.resident.year.end)
df$CPI = df$CPI-100
df$Pension.shortfall = df$Pension.shortfall/1000000 # in millions

######################################################################
# Figure 2: The Life Cycle of Threats and Protest in China           #
######################################################################

df$day = as.integer(yday(df$date))

#### Protest Plot

un = as.data.frame(c(1:365));colnames(un)="day";head(un)
for(i in 1:nrow(un)){
  un$pro[i] = mean(na.omit(df$protests[df$day==un$day[i]]))
  }

ymax = 8
par(mar=c(6,5,10,5),xpd=T)
plot(un$pro,type="l",ylab=paste("Protests",sep=' '),ylim=c(0,ymax),bty="n",xlab=NA,xaxt="n",col="navy",lwd=2,cex.lab=1.2) # ylim=c(0,.3),axes=F,,col.lab="navy"
lablist = c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec")
axis(1, at=seq(1,365,by=31), -20, labels=lablist,cex.axis=1.2)
segments(1,mean(na.omit(un$pro)+2*sd(na.omit(un$pro))),365,mean(na.omit(un$pro)+2*sd(na.omit(un$pro))),col="navy",lty="dashed",lwd=2)
segments(1,mean(na.omit(un$pro)+1*sd(na.omit(un$pro))),365,mean(na.omit(un$pro)+1*sd(na.omit(un$pro))),col="cadetblue",lty="dashed",lwd=2)
text(25,ymax,adj=0,"Lunar New Year",xpd=T,srt=90,col="darkgray",cex=1.2)
text(67,ymax,adj=0,"Tibetan Rebellion",xpd=T,srt=90,col="orange",cex=1.2) # march 3
text(93,ymax,adj=0,"Serf Emancipation Day",xpd=T,srt=90,col="orange",cex=1.2) # march 28
text(160,ymax,adj=0,"Tiananmen",xpd=T,srt=90,col="darkgray",cex=1.2)
text(145,ymax,adj=0,'Tibetan "Liberation"',xpd=T,srt=90,col="orange",cex=1.2) # may 23
text(193,ymax,adj=0,"Xinjiang Uprising",xpd=T,srt=90,col="orange",cex=1.2) # july 5
text(230,ymax,adj=0,"Leadership Retreat",xpd=T,srt=90,col="darkgray",cex=1.2)
text(305,ymax,adj=0,"Party Congress 2017",xpd=T,srt=90,col="darkgray",cex=1.2) # oct 18
text(320,ymax,adj=0,"Party Congress 2012",xpd=T,srt=90,col="darkgray",cex=1.2) # nov 8
text(335,ymax,adj=0,"Democracy Wall",xpd=T,srt=90,col="darkgray",cex=1.2)
text(342,ymax,adj=0,"Constitution Day",xpd=T,srt=90,col="darkgray",cex=1.2) # dec 4
text(348,ymax,adj=0,"Charter 08",xpd=T,srt=90,col="darkgray",cex=1.2) # dec 10
text(356,ymax,adj=0,"NPC Direct Election",xpd=T,srt=90,col="darkgray",cex=1.2) # dec 19
filename = "ProtestPlotSumIVgrayV2.pdf"
#dev.print(paste(figdir,filename,sep=""),device=pdf)

#### Threat Plot           

un = as.data.frame(c(1:365));colnames(un)="day";head(un)
for(i in 1:nrow(un)){
  un$threats.topic[i] = sum(na.omit(df$threats.topic[df$day==un$day[i]])) # & t$year %in% badyear==F])
  un$social.stability[i] = sum(na.omit(df$social.stability[df$day==un$day[i]])) # & t$year %in% badyear==F])
  un$weiwen[i] = sum(na.omit(df$weiwen[df$day==un$day[i]])) # & t$year %in% badyear==F])
  un$wending[i] = sum(na.omit(df$wending[df$day==un$day[i]])) # & t$year %in% badyear==F])
  un$hexie[i] = sum(na.omit(df$hexie[df$day==un$day[i]])) # & t$year %in% badyear==F])
  un$jingcha[i] = sum(na.omit(df$jingcha[df$day==un$day[i]])) # & t$year %in% badyear==F])
}
un$threats.topic[un$threats.topic==0] = NA
un$social.stability[un$social.stability==0] = NA

ymax = 260
par(mar=c(6,5,10,5),xpd=T)
plot(un$jingcha,ylab=paste("Terms",sep=' '),ylim=c(0,ymax),bty="n",xlab=NA,xaxt="n",col="navy",lwd=2,cex.lab=1.2,type="l") # ylim=c(0,.3),axes=F,col.lab="navy"
lablist = c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec")
axis(1, at=seq(1,365,by=31), -20, labels=lablist,cex.axis=1.2)
points(un$wending,col="red",pch=2,type="l")
points(un$hexie,col="orange",pch=3,type="l")
points(un$weiwen,col="green",pch=3,type="l")
legend("bottom",inset=c(0,-.6),lwd=3,cex=1.2,horiz=T,col=c("green","navy","red","orange"),c("Social Stability","Police","Stability","Harmony"),bty="n")

text(25,ymax,adj=0,"Lunar New Year",xpd=T,srt=90,col="darkgrey",cex=1.2)
text(67,ymax,adj=0,"Tibetan Rebellion",xpd=T,srt=90,col="orange",cex=1.2) # march 3
text(93,ymax,adj=0,"Serf Emancipation Day",xpd=T,srt=90,col="orange",cex=1.2) # march 28
text(160,ymax,adj=0,"Tiananmen",xpd=T,srt=90,col="darkgrey",cex=1.2)
text(145,ymax,adj=0,'Tibetan "Liberation"',xpd=T,srt=90,col="orange",cex=1.2) # may 23
text(193,ymax,adj=0,"Xinjiang Uprising",xpd=T,srt=90,col="orange",cex=1.2) # july 5
text(230,ymax,adj=0,"Leadership Retreat",xpd=T,srt=90,col="darkgrey",cex=1.2)
text(305,ymax,adj=0,"Party Congress 2017",xpd=T,srt=90,col="darkgrey",cex=1.2) # oct 18
text(320,ymax,adj=0,"Party Congress 2012",xpd=T,srt=90,col="darkgrey",cex=1.2) # nov 8
text(335,ymax,adj=0,"Democracy Wall",xpd=T,srt=90,col="darkgrey",cex=1.2)
text(342,ymax,adj=0,"Constitution Day",xpd=T,srt=90,col="darkgrey",cex=1.2) # dec 4
text(348,ymax,adj=0,"Charter 08",xpd=T,srt=90,col="darkgrey",cex=1.2) # dec 10
text(356,ymax,adj=0,"NPC Direct Election",xpd=T,srt=90,col="darkgrey",cex=1.2) # dec 19

par(new=T,xpd=T)
plot(un$threats.topic,col="black",pch=3,bty="n",axes=F,ylab=NA,xlab=NA,ylim=c(0,2),lwd=1)
par(new=T,xpd=T)
plot(un$social.stability,col="purple",pch=4,bty="n",axes=F,ylab=NA,xlab=NA,ylim=c(0,2),lty=1)
axis(4)
mtext("Articles",cex=1.2,side=4,line=3,col="black")
legend("bottom",inset=c(0,-.8),lwd=3,cex=1.2,horiz=T,pch=c(3,4),col=c("black","purple"),c("Social Stability","Law Enforcement"),bty="n")

filename = "AllSemanticData.pdf"
#dev.print(paste(figdir,filename,sep=""),device=pdf)


######################################################################
# Figure 3: Rates of Propaganda-Based Threats                        #
######################################################################

##### Topics 

a = mean(df$social.stability[df$awindow3==1],na.rm=T)
b = mean(df$social.stability[df$awindow3==0],na.rm=T)
c = mean(df$threats.topic[df$awindow3==1],na.rm=T)
d = mean(df$threats.topic[df$awindow3==0],na.rm=T) 

counts <- c(a*100,b*100,c*100,d*100)
mylab  = c("Stability | Separatist Window",
           "Stability | Non Window",
           "Law Enforcement | Separatist Window",
           "Law Enforcement | Non Window"
           )

par(las=2) 
par(mar=c(5,23,4,2)) 
barplot(counts, names.arg=mylab,horiz=T,col="coral4",cex=1.5,cex.axis=1.7)
filename = "ConditionalProbability4.pdf"
#dev.print(paste(figdir,filename,sep=""),device=pdf)

##### References

a = mean(df$wending[df$awindow3==1],na.rm=T)
b = mean(df$wending[df$awindow3==0],na.rm=T)
c = mean(df$hexie[df$awindow3==1],na.rm=T)
d = mean(df$hexie[df$awindow3==0],na.rm=T) 

counts <- c(a,b,c,d)
mylab  = c("'Stability' | Separatist Window",
           "'Stability' | Non Window",
           "'Harmony' | Separatist Window",
           "'Harmony' | Non Window"
           )

par(las=2)
par(mar=c(5,18,4,2)) 
barplot(counts, names.arg=mylab,horiz=T,col="coral4",cex=1.5,cex.axis=1.7)
filename = "ConditionalProbability5.pdf"
#dev.print(paste(figdir,filename,sep=""),device=pdf)


######################################################################
# Table 3: When Autocrats Issue Propaganda-Based Threats (Topics)    #
######################################################################

df$awin = df$anniv
df$dwin = df$democratic
df$pwin = df$political
df$cwin = df$cultural
m0s = glm(social.stability ~ awin + dwin + pwin + cwin + protests.l1 + as.factor(year),family="binomial",data=df);summary(m0s)
df$awin = df$awindow1
df$dwin = df$dwindow1
df$pwin = df$pwindow1
df$cwin = df$cwindow1
m1s = glm(social.stability ~ awin + dwin + pwin + cwin + protests.l1 + as.factor(year),family="binomial",data=df);summary(m1s)
df$awin = df$awindow2
df$dwin = df$dwindow2
df$pwin = df$pwindow2
df$cwin = df$cwindow2
m2s = glm(social.stability ~ awin + dwin + pwin + cwin + protests.l1 + as.factor(year),family="binomial",data=df);summary(m2s)
df$awin = df$awindow3
df$dwin = df$dwindow3
df$pwin = df$pwindow3
df$cwin = df$cwindow3
m3s = glm(social.stability ~ awin + dwin + pwin + cwin + protests.l1 + as.factor(year),family="binomial",data=df);summary(m3s)
df$awin = df$awindow4
df$dwin = df$dwindow4
df$pwin = df$pwindow4
df$cwin = df$cwindow4
m4s = glm(social.stability ~ awin + dwin + pwin + cwin + protests.l1 + as.factor(year),family="binomial",data=df);summary(m4s)
df$awin = df$awindow5
df$dwin = df$dwindow5
df$pwin = df$pwindow5
df$cwin = df$cwindow5
m5s = glm(social.stability ~ awin + dwin + pwin + cwin + protests.l1 + as.factor(year),family="binomial",data=df);summary(m5s)

df$awin = df$anniv
df$dwin = df$democratic
df$pwin = df$political
df$cwin = df$cultural
m0t = glm(threats.topic ~ awin + dwin + pwin + cwin + protests.l1 + as.factor(year),family="binomial",data=df);summary(m0s)
df$awin = df$awindow1
df$dwin = df$dwindow1
df$pwin = df$pwindow1
df$cwin = df$cwindow1
m1t = glm(threats.topic ~ awin + dwin + pwin + cwin + protests.l1 + as.factor(year),family="binomial",data=df);summary(m1t)
df$awin = df$awindow2
df$dwin = df$dwindow2
df$pwin = df$pwindow2
df$cwin = df$cwindow2
m2t = glm(threats.topic ~ awin + dwin + pwin + cwin + protests.l1 + as.factor(year),family="binomial",data=df);summary(m2t)
df$awin = df$awindow3
df$dwin = df$dwindow3
df$pwin = df$pwindow3
df$cwin = df$cwindow3
m3t = glm(threats.topic ~ awin + dwin + pwin + cwin + protests.l1 + as.factor(year),family="binomial",data=df);summary(m3t)
df$awin = df$awindow4
df$dwin = df$dwindow4
df$pwin = df$pwindow4
df$cwin = df$cwindow4
m4t = glm(threats.topic ~ awin + dwin + pwin + cwin + protests.l1 + as.factor(year),family="binomial",data=df);summary(m4t)
df$awin = df$awindow5
df$dwin = df$dwindow5
df$pwin = df$pwindow5
df$cwin = df$cwindow5
m5t = glm(threats.topic ~ awin + dwin + pwin + cwin + protests.l1 + as.factor(year),family="binomial",data=df);summary(m5t)


stargazer(m0s,m1s,m2s,m3s,m4s,m5s,
          m0t,m1t,m2t,m3t,m4t,m5t,
          type="latex",
          title="When Autocrats Issue Propaganda-Based Threats: Topics",
          label="Table: Threat Assignment Mechanism 1 FE",
          font.size = "footnotesize",
          omit=c("year"),
          add.lines=list(c("Year Fixed Effects",rep("Yes",12))),
          covariate.labels = c("Separatist Anniversary",
                               "Democratic Anniversary",
                               "Political Anniversary",
                               "Cultural Anniversary",
                               "Protests$_{t-1}$"),
          dep.var.labels = c("Social Stability Topic","Law Enforcement Topic"),
          column.labels = c(rep(c("0 Day","1 Day","2 Day","3 Day","4 Day","5 Day"),2)),
          notes = c("$N$ Day column names give +/-$N$ size of anniversary windows."))


#######################################################################
# Table 4: When Autocrats Issue Propaganda-Based Threats (References) #
#######################################################################

df$awin = df$anniv
df$dwin = df$democratic
df$pwin = df$political
df$cwin = df$cultural
m0s = glm.nb(wending ~ awin + dwin + pwin + cwin + protests.l1 + as.factor(year),data=df);summary(m0s)
df$awin = df$awindow1
df$dwin = df$dwindow1
df$pwin = df$pwindow1
df$cwin = df$cwindow1
m1s = glm.nb(wending ~ awin + dwin + pwin + cwin + protests.l1 + as.factor(year),data=df);summary(m1s)
df$awin = df$awindow2
df$dwin = df$dwindow2
df$pwin = df$pwindow2
df$cwin = df$cwindow2
m2s = glm.nb(wending ~ awin + dwin + pwin + cwin + protests.l1 + as.factor(year),data=df);summary(m2s)
df$awin = df$awindow3
df$dwin = df$dwindow3
df$pwin = df$pwindow3
df$cwin = df$cwindow3
m3s = glm.nb(wending ~ awin + dwin + pwin + cwin + protests.l1 + as.factor(year),data=df);summary(m3s)
df$awin = df$awindow4
df$dwin = df$dwindow4
df$pwin = df$pwindow4
df$cwin = df$cwindow4
m4s = glm.nb(wending ~ awin + dwin + pwin + cwin + protests.l1 + as.factor(year),data=df);summary(m4s)
df$awin = df$awindow5
df$dwin = df$dwindow5
df$pwin = df$pwindow5
df$cwin = df$cwindow5
m5s = glm.nb(wending ~ awin + dwin + pwin + cwin + protests.l1 + as.factor(year),data=df);summary(m5s)

df$awin = df$anniv
df$dwin = df$democratic
df$pwin = df$political
df$cwin = df$cultural
m0t = glm.nb(hexie ~ awin + dwin + pwin + cwin + protests.l1 + as.factor(year),data=df);summary(m0s)
df$awin = df$awindow1
df$dwin = df$dwindow1
df$pwin = df$pwindow1
df$cwin = df$cwindow1
m1t = glm.nb(hexie ~ awin + dwin + pwin + cwin + protests.l1 + as.factor(year),data=df);summary(m1t)
df$awin = df$awindow2
df$dwin = df$dwindow2
df$pwin = df$pwindow2
df$cwin = df$cwindow2
m2t = glm.nb(hexie ~ awin + dwin + pwin + cwin + protests.l1 + as.factor(year),data=df);summary(m2t)
df$awin = df$awindow3
df$dwin = df$dwindow3
df$pwin = df$pwindow3
df$cwin = df$cwindow3
m3t = glm.nb(hexie ~ awin + dwin + pwin + cwin + protests.l1 + as.factor(year),data=df);summary(m3t)
df$awin = df$awindow4
df$dwin = df$dwindow4
df$pwin = df$pwindow4
df$cwin = df$cwindow4
m4t = glm.nb(hexie ~ awin + dwin + pwin + cwin + protests.l1 + as.factor(year),data=df);summary(m4t)
df$awin = df$awindow5
df$dwin = df$dwindow5
df$pwin = df$pwindow5
df$cwin = df$cwindow5
m5t = glm.nb(hexie ~ awin + dwin + pwin + cwin + protests.l1 + as.factor(year),data=df);summary(m5t)


stargazer(m0s,m1s,m2s,m3s,m4s,m5s,
          m0t,m1t,m2t,m3t,m4t,m5t,
          type="latex",
          title="When Autocrats Issue Propaganda-Based Threats: References",
          label="Table: Threat Assignment Mechanism 1 FE Count",
          font.size = "footnotesize",
          omit=c("year"),
          add.lines=list(c("Year Fixed Effects",rep("Yes",12))),
          covariate.labels = c("Separatist Anniversary",
                               "Democratic Anniversary",
                               "Political Anniversary",
                               "Cultural Anniversary",
                               "Protests$_{t-1}$"),
          dep.var.labels = c("Stability","Harmony"),
          column.labels = c(rep(c("0 Day","1 Day","2 Day","3 Day","4 Day","5 Day"),2)),
          notes = c("$N$ Day column names give +/-$N$ size of anniversary windows."))

#######################################################################
# Figure 4: Map of China                                              #
#######################################################################

# follow these instructions to install fftw via terminal: http://trucvietle.me/r/tutorial/2016/12/18/cartogram-plotting-using-r.html
library(devtools)
#install_github('omegahat/Rcartogram')
#install_github('chrisbrunsdon/getcartr', subdir='getcartr')
library(Rcartogram)
library(getcartr)
library(ggplot2)
library(maptools)
library(mapdata)
library(foreign)
library(ggplot2)
library(knitr)
library(ggvis)
library(rgdal)
library(raster)
library(sp)
library(rgeos)

getSmallPolys <- function(poly, minarea=0.01) {
  # Get the areas
  areas <- lapply(poly@polygons, 
                  function(x) sapply(x@Polygons, function(y) y@area))
  
  # Quick summary of the areas
  print(quantile(unlist(areas)))
  
  # Which are the big polygons?
  bigpolys <- lapply(areas, function(x) which(x > minarea))
  length(unlist(bigpolys))
  
  # Get only the big polygons and extract them
  for(i in 1:length(bigpolys)){
    if(length(bigpolys[[i]]) >= 1 && bigpolys[[i]] >= 1){
      poly@polygons[[i]]@Polygons <- poly@polygons[[i]]@Polygons[bigpolys[[i]]]
      poly@polygons[[i]]@plotOrder <- 1:length(poly@polygons[[i]]@Polygons)
    }
  }
  return(poly)
}

ch0 <-getData('GADM', country='CHN', level=1)  ##Get the province shapefile for China
# Use level1 as index
ChinaLevel1Data <- ch0@data
ChinaLevel1Data$id <- ChinaLevel1Data$NL_NAME_1
# Fortify the data (polygon map as dataframe)
ChinaLevel1dF <- fortify(ch0, region = "NL_NAME_1")


ch <-getData('GADM', country='CHN', level=1)  ##Get the province shapefile for China
min<-subset(ch,NAME_1=="Xizang" | NAME_1=="Xinjiang Uygur")
min2<-subset(ch,NAME_1=="Beijing" | NAME_1=="Sichuan" | NAME_1=="Qinghai" | NAME_1=="Gansu" | NAME_1=="Yunnan" | NAME_1=="Ningxia Hui" | NAME_1=="Nei Mongol" )
ch <- gSimplify(ch, tol=0.01, topologyPreserve=TRUE)
ch <- getSmallPolys(ch) 
min <- gSimplify(min, tol=0.01, topologyPreserve=TRUE)
min <- getSmallPolys(min) 
min2 <- gSimplify(min2, tol=0.01, topologyPreserve=TRUE)
min2 <- getSmallPolys(min2) 


# Combine
Level1Centers <- ddply(ChinaLevel1dF, .(id), summarize, clat = mean(lat), clong = mean(long))
# Merge results in one data frame
ChinaLevel1Data <- merge(ChinaLevel1Data, Level1Centers, all = TRUE)

# Alter label positions so they don't overlap
adj = 1
ChinaLevel1Data$clat[ChinaLevel1Data$NAME_1=="Beijing"] = ChinaLevel1Data$clat[ChinaLevel1Data$NAME_1=="Beijing"] + adj
ChinaLevel1Data$clong[ChinaLevel1Data$NAME_1=="Liaoning"] = ChinaLevel1Data$clong[ChinaLevel1Data$NAME_1=="Liaoning"] + 2*adj
ChinaLevel1Data$clat[ChinaLevel1Data$NAME_1=="Tianjin"] = ChinaLevel1Data$clat[ChinaLevel1Data$NAME_1=="Tianjin"] - adj
ChinaLevel1Data$clong[ChinaLevel1Data$NAME_1=="Jiangxi"] = ChinaLevel1Data$clong[ChinaLevel1Data$NAME_1=="Jiangxi"] + adj
ChinaLevel1Data$clat[ChinaLevel1Data$NAME_1=="Guangxi"] = ChinaLevel1Data$clat[ChinaLevel1Data$NAME_1=="Guangxi"] + 2*adj
ChinaLevel1Data$clat[ChinaLevel1Data$NAME_1=="Ningxia Hui"] = ChinaLevel1Data$clat[ChinaLevel1Data$NAME_1=="Ningxia Hui"] + 2*adj
ChinaLevel1Data$clat[ChinaLevel1Data$NAME_1=="Shaanxi"] = ChinaLevel1Data$clat[ChinaLevel1Data$NAME_1=="Shaanxi"] - adj
ChinaLevel1Data$clong[ChinaLevel1Data$NAME_1=="Shaanxi"] = ChinaLevel1Data$clong[ChinaLevel1Data$NAME_1=="Shaanxi"] + adj
ChinaLevel1Data$NAME_1[ChinaLevel1Data$NAME_1=="Ningxia Hui"] = "Ningxia"
ChinaLevel1Data$NAME_1[ChinaLevel1Data$NAME_1=="Xinjiang Uygur"] = "Xinjiang"
ChinaLevel1Data$NAME_1[ChinaLevel1Data$NAME_1=="Xizang"] = "Tibet"
ChinaLevel1Data$NAME_1[ChinaLevel1Data$NAME_1=="Nei Mongol"] = "Inner Mongolia"

# Plot
ch                                                                  %>%
  ggplot(aes(x = long, y = lat, group = group))                      +
  geom_polygon(fill = "beige", color = "grey")                       +
  geom_polygon(data = min, aes(x=long, y=lat,group=group),fill="coral3",color="grey") + 
  geom_polygon(data = min2, aes(x=long, y=lat,group=group),fill="cadetblue",color="grey") + 
  geom_text(data = ChinaLevel1Data, aes(x = clong, y = clat, label = NAME_1, size = 2, group = NULL)) + 
  theme(axis.line=element_blank(),axis.text.x=element_blank(),
        axis.text.y=element_blank(),axis.ticks=element_blank(),
        axis.title.x=element_blank(),
        axis.title.y=element_blank(),
        legend.position="none",
        panel.background=element_blank(),panel.border=element_blank(),panel.grid.major=element_blank(),
        panel.grid.minor=element_blank(),plot.background=element_blank())

# Save 
filename="chinaMap.pdf"
#dev.print(paste(figdir,filename,sep=""),device=pdf)

##############################################################
# Figure 5: Surveys                                          #
##############################################################

##### Nationally Representative Sample

df = read.csv(paste(datadir,'Survey1000ResponsesFormatted.csv',sep=""),header=T,stringsAsFactors=F);names(df)
df = df[3:nrow(df),]
df$Duration..in.seconds. = as.integer(as.vector(df$Duration..in.seconds.))
colnames(df)[colnames(df)=="Q33"] = "tibetRebellion"

cutoff = 300 # median, 5 mins
df = df[df$Duration..in.seconds.<=cutoff,]
nrow(df) 

# standardize date format, turning both DKs and NAs into 'NA'

df$tibetRebellion = as.Date(df$tibetRebellion,"%m.%d",origin="1970-01-01")
df$tibetLiberation = as.Date(df$tibetLiberation,"%m.%d",origin="1970-01-01")
df$serfDay = as.Date(df$serfDay,"%m.%d",origin="1970-01-01")
df$urumqi = as.Date(df$urumqi,"%m.%d",origin="1970-01-01")
df$lama = as.Date(df$lama,"%m.%d",origin="1970-01-01")

df[c("tibetRebellionOff","tibetLiberationOff","serfDayOff","urumqiOff","lamaOff")] = NA

# automate 

mydates = c(as.Date("2020-03-10"),as.Date("2020-05-23"),as.Date("2020-03-28"),as.Date("2020-07-05"),as.Date("2020-07-06"))
myvars = c("tibetRebellion","tibetLiberation","serfDay","urumqi","lama")
offvars = c("tibetRebellionOff","tibetLiberationOff","serfDayOff","urumqiOff","lamaOff") # to count days off
dkvars = c("tibetRebellionDK","tibetLiberationDK","serfDayDK","urumqiDK","lamaDK") # to count DK responses

for(j in seq(myvars)){
  var = myvars[j]
  date = mydates[j]
  off = offvars[j]
  dk = dkvars[j]
  df[,off] = NA
  df[,dk] = NA
  print(paste(var,as.character(date),sep=": "))
  for(i in 1:nrow(df)){
    # if respondent guessed a date, count how far off it was
    if(is.na(df[i,var])==F){
      df[i,off] = abs(df[i,var] - date)
    }
    # if respondent did not guess a date, assign 1 in DK column
    if(is.na(df[i,var])==T){ 
      df[i,dk] = 1
    }
  }
}

myvars = c("Tibetan Rebellion","Tibetan Liberation","Serf Day","Xinjiang Uprising","Dalai Lama Birthday",
           "Youth Day","Christmas","National Day","CCP Founding","Army Day","New Year","Singles' Day","Valentine's Day")

mydata = c(
  sum(df$tibetRebellion==as.Date("2020-03-10"),na.rm=T)/nrow(df),
  sum(df$tibetLiberation==as.Date("2020-05-23"),na.rm=T)/nrow(df),
  sum(df$serfDay==as.Date("2020-03-28"),na.rm=T)/nrow(df),
  sum(df$urumqi==as.Date("2020-07-05"),na.rm=T)/nrow(df),
  sum(df$lama==as.Date("2020-07-06"),na.rm=T)/nrow(df),
  sum(as.Date(df$youthDay,"%m.%d",origin="1970-01-01")==as.Date("2020-05-04"),na.rm=T)/nrow(df),
  sum(as.Date(df$christmas,"%m.%d",origin="1970-01-01")==as.Date("2020-12-25"),na.rm=T)/nrow(df),
  sum(as.Date(df$nationalDay,"%m.%d",origin="1970-01-01")==as.Date("2020-10-01"),na.rm=T)/nrow(df),
  sum(as.Date(df$ccpFounding,"%m.%d",origin="1970-01-01")==as.Date("2020-07-01"),na.rm=T)/nrow(df),
  sum(as.Date(df$armyDay,"%m.%d",origin="1970-01-01")==as.Date("2020-08-01"),na.rm=T)/nrow(df),
  sum(as.Date(df$newYear,"%m.%d",origin="1970-01-01")==as.Date("2020-01-01"),na.rm=T)/nrow(df),
  sum(as.Date(df$singlesDay,"%m.%d",origin="1970-01-01")==as.Date("2020-11-11"),na.rm=T)/nrow(df),
  sum(as.Date(df$valentine,"%m.%d",origin="1970-01-01")==as.Date("2020-02-14"),na.rm=T)/nrow(df)
)

data = as.data.frame(cbind(myvars,mydata));colnames(data)=c("Anniversary","Accuracy")
data <- data[order(data$Accuracy,decreasing = F),]

par(mar=c(5, 8.5, 4, 1) + 0.1,xpd=TRUE)
barplot(as.numeric(as.vector(data$Accuracy)),
        names.arg = as.vector(data$Anniversary),
        las=2,horiz=T,
        col=c(rep("coral3",5),rep("cadetblue",8)),
        main = "Nationally Representative Sample",
        xlim=c(0,1)) 

filename = "SurveyAccuracy7.pdf" 
#dev.print(paste(figdir,filename,sep=""),device=pdf)

#### CCP Members

df = df[df$party %in% c("有，现役党员","有，但已退党"),]

mydata = c(
  sum(df$tibetRebellion==as.Date("2020-03-10"),na.rm=T)/nrow(df),
  sum(df$tibetLiberation==as.Date("2020-05-23"),na.rm=T)/nrow(df),
  sum(df$serfDay==as.Date("2020-03-28"),na.rm=T)/nrow(df),
  sum(df$urumqi==as.Date("2020-07-05"),na.rm=T)/nrow(df),
  sum(df$lama==as.Date("2020-07-06"),na.rm=T)/nrow(df),
  sum(as.Date(df$youthDay,"%m.%d",origin="1970-01-01")==as.Date("2020-05-04"),na.rm=T)/nrow(df),
  sum(as.Date(df$christmas,"%m.%d",origin="1970-01-01")==as.Date("2020-12-25"),na.rm=T)/nrow(df),
  sum(as.Date(df$nationalDay,"%m.%d",origin="1970-01-01")==as.Date("2020-10-01"),na.rm=T)/nrow(df),
  sum(as.Date(df$ccpFounding,"%m.%d",origin="1970-01-01")==as.Date("2020-07-01"),na.rm=T)/nrow(df),
  sum(as.Date(df$armyDay,"%m.%d",origin="1970-01-01")==as.Date("2020-08-01"),na.rm=T)/nrow(df),
  sum(as.Date(df$newYear,"%m.%d",origin="1970-01-01")==as.Date("2020-01-01"),na.rm=T)/nrow(df),
  sum(as.Date(df$singlesDay,"%m.%d",origin="1970-01-01")==as.Date("2020-11-11"),na.rm=T)/nrow(df),
  sum(as.Date(df$valentine,"%m.%d",origin="1970-01-01")==as.Date("2020-02-14"),na.rm=T)/nrow(df)
  )

data = as.data.frame(cbind(myvars,mydata));colnames(data)=c("Anniversary","Accuracy")
data <- data[order(data$Accuracy,decreasing = F),]

par(mar=c(5, 8.5, 4, 1) + 0.1,xpd=TRUE)
barplot(as.numeric(as.vector(data$Accuracy)),
        names.arg = as.vector(data$Anniversary),
        las=2,horiz=T,
        col=c(rep("coral3",5),rep("cadetblue",8)),
        main = "CCP Members",
        xlim=c(0,1)) 

filename = "SurveyAccuracy7Party.pdf" 
#dev.print(paste(figdir,filename,sep=""),device=pdf)


#### University Sample

df = read.csv(paste(datadir,'USC survey 3.csv',sep=""),sep="\t",header=T,stringsAsFactors=F);names(df)
df = df[3:nrow(df),]
df$Duration..in.seconds.=as.numeric(df$Duration..in.seconds.)

df = df[df$citizen=="中国" & df$edu1!="博士",];nrow(df) # 85

names(df)[names(df)=="Q33"] = "tibetRebellion"
df$tibetRebellion = as.Date(df$tibetRebellion,"%m.%d",origin="1970-01-01")
df$tibetLiberation = as.Date(df$tibetLiberation,"%m.%d",origin="1970-01-01")
df$serfDay = as.Date(df$serfDay,"%m.%d",origin="1970-01-01")
df$urumqi = as.Date(df$urumqi,"%m.%d",origin="1970-01-01")
df$lama = as.Date(df$lama,"%m.%d",origin="1970-01-01")

myvars = c("Tibetan Rebellion","Tibetan Liberation","Serf Day","Xinjiang Uprising","Dalai Lama Birthday",
           "Youth Day","Christmas","National Day","CCP Founding","Army Day","New Year","Singles' Day","Valentine's Day")

mydata = c(
  sum(df$tibetRebellion==as.Date("2020-03-10"),na.rm=T)/nrow(df),
  sum(df$tibetLiberation==as.Date("2020-05-23"),na.rm=T)/nrow(df),
  sum(df$serfDay==as.Date("2020-03-28"),na.rm=T)/nrow(df),
  sum(df$urumqi==as.Date("2020-07-05"),na.rm=T)/nrow(df),
  sum(df$lama==as.Date("2020-07-06"),na.rm=T)/nrow(df),
  sum(as.Date(df$youthDay,"%m.%d",origin="1970-01-01")==as.Date("2020-05-04"),na.rm=T)/nrow(df),
  sum(as.Date(df$christmas,"%m.%d",origin="1970-01-01")==as.Date("2020-12-25"),na.rm=T)/nrow(df),
  sum(as.Date(df$nationalDay,"%m.%d",origin="1970-01-01")==as.Date("2020-10-01"),na.rm=T)/nrow(df),
  sum(as.Date(df$ccpFounding,"%m.%d",origin="1970-01-01")==as.Date("2020-07-01"),na.rm=T)/nrow(df),
  sum(as.Date(df$armyDay,"%m.%d",origin="1970-01-01")==as.Date("2020-08-01"),na.rm=T)/nrow(df),
  sum(as.Date(df$newYear,"%m.%d",origin="1970-01-01")==as.Date("2020-01-01"),na.rm=T)/nrow(df),
  sum(as.Date(df$singlesDay,"%m.%d",origin="1970-01-01")==as.Date("2020-11-11"),na.rm=T)/nrow(df),
  sum(as.Date(df$valentine,"%m.%d",origin="1970-01-01")==as.Date("2020-02-14"),na.rm=T)/nrow(df)
  )

data = as.data.frame(cbind(myvars,mydata));colnames(data)=c("Anniversary","Accuracy")
data <- data[order(data$Accuracy,decreasing = F),]

par(mar=c(5, 8.5, 4, 1) + 0.1,xpd=TRUE)
barplot(as.numeric(as.vector(data$Accuracy)),
        names.arg = as.vector(data$Anniversary),
        las=2,horiz=T,
        col=c(rep("coral3",5),rep("cadetblue",8)),
        main = "University Sample",
        xlim=c(0,1))

filename = "SurveyAccuracyUSC3.pdf" 
#dev.print(paste(figdir,filename,sep=""),device=pdf)


##############################################################
# Table 5: State Repression                                  #
##############################################################

# load protest level data
df = read.csv(paste(datadir,'df_nation_day_rr_3_repression.csv',sep=""),stringsAsFactors=F)
df = df[df$minorityProvince==0,]
df = df[df$province!="Beijing",]

df$recentprotests = df$protests.l1
df$awin = df$anniv
df$dwin = df$democratic
df$pwin = df$political
df$cwin = df$cultural
m0 = glm.nb(violence ~ awin + dwin + pwin + cwin + recentprotests + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(province),data=df);summary(m0)
df$awin = df$awindow1
df$dwin = df$dwindow1
df$pwin = df$pwindow1
df$cwin = df$cwindow1
m1 = glm.nb(violence ~ awin + dwin + pwin + cwin + recentprotests + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(province),data=df);summary(m1)
df$recentprotests = df$protestsSumPre2
df$awin = df$awindow2
df$dwin = df$dwindow2
df$pwin = df$pwindow2
df$cwin = df$cwindow2
m2 = glm.nb(violence ~ awin + dwin + pwin + cwin + recentprotests + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(province),data=df);summary(m2)
df$recentprotests = df$protestsSumPre3
df$awin = df$awindow3
df$dwin = df$dwindow3
df$pwin = df$pwindow3
df$cwin = df$cwindow3
m3 = glm.nb(violence ~ awin + dwin + pwin + cwin + recentprotests + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(province),data=df);summary(m3)
df$recentprotests = df$protestsSumPre4
df$awin = df$awindow4
df$dwin = df$dwindow4
df$pwin = df$pwindow4
df$cwin = df$cwindow4
m4 = glm.nb(violence ~ awin + dwin + pwin + cwin + recentprotests + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(province),data=df);summary(m4)
df$recentprotests = df$protestsSumPre5
df$awin = df$awindow5
df$dwin = df$dwindow5
df$pwin = df$pwindow5
df$cwin = df$cwindow5
m5 = glm.nb(violence ~ awin + dwin + pwin + cwin + recentprotests + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(province),data=df);summary(m5)


stargazer(m0,m1,m2,m3,m4,m5,
          type="latex",
          title="State Repression",
          label="Table: Repression",
          font.size="small",
          omit=c("year","province"),
          dep.var.labels = "Repression",
          column.labels = c("0 Day","1 Day","2 Day","3 Day","4 Day","5 Day"),
          add.lines=list(c("Province Fixed Effects",rep("Yes",6))),
          covariate.labels = c("Separatist Anniversary",
                               "Democratic Anniversary",
                               "Political Anniversary",
                               "Cultural Anniversary",
                               "Recent Protests",
                               #"Minority Province",
                               #"Minority Province $\\times$ Separatist Anniversary",
                               "Log GRP",
                               "Log Population",
                               "Rural Population Share",
                               "Sex Ratio",
                               "Urban Unemployment Rate"),
          notes=c("This analysis excludes red and green provinces from Figure 4.",
                  "Recent protests records the total number of protests in $t-1:t-N$, where $N$ is the length of the anniversary window."))


#####################################################
# Tables 6 and 7: IV Results                        #
#####################################################

df = read.csv(paste(datadir,'df_nation_day_rr_3.csv',sep=""));df$X=NULL;df$date=as.Date(df$date);names(df)
df$logGRP = log(df$GRP)
df$logPop = log(df$Population.resident.year.end)
df$CPI = df$CPI-100
df$Pension.shortfall = df$Pension.shortfall/1000000 # in millions

# descriptive stats
summary(df$protestsexMinoritySumPost7,na.rm=T)
sd(df$protestsexMinoritySumPost7,na.rm=T)

m1 <- ivreg(protestsexMinoritySumPost7 ~ hexie | awindow3, data=df);summary(m1,diagnostics = TRUE)
m2 <- ivreg(protestsexMinoritySumPost7 ~ hexie + protestsexMinority.l1 + as.factor(year) | awindow3 + protestsexMinority.l1 + as.factor(year), data=df);summary(m2,diagnostics=TRUE)
m3 <- ivreg(protestsexMinoritySumPost7 ~ hexie + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year) | awindow3 + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year), data=df);summary(m3,diagnostics=TRUE)

m4 <- ivreg(protestsexMinoritySumPost7 ~ wending | awindow3, data=df);summary(m4, diagnostics=TRUE)
m5 <- ivreg(protestsexMinoritySumPost7 ~ wending + protestsexMinority.l1 + as.factor(year) | awindow3 + protestsexMinority.l1 + as.factor(year), data=df);summary(m5,diagnostics=TRUE)
m6 <- ivreg(protestsexMinoritySumPost7 ~ wending + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year) | awindow3 + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year), data=df);summary(m6,diagnostics=TRUE)

# second stage
stargazer(m4,m5,m6,m1,m2,m3,
          type="latex",
          title="IV Results: Second Stage",
          label="Table: IV Results",
          font.size="small",
          omit=c("year"),
          dep.var.labels = "Protests$_{t+1:t+7}$",
          add.lines=list(c("Year Fixed Effects","No","Yes","Yes","No","Yes","Yes")) ,
          order=c("wending","hexie","dwindow3","pwindow3","cwindow3","protestsexMinority.l1"),
          covariate.labels = c("Stability",
                               "Harmony",
                               "Democratic Anniversary",
                               "Political Anniversary",
                               "Cultural Anniversary",
                               "Protests$_{t-1}$",
                               "Log GRP",
                               "Log Population",
                               "Rural Population Share",
                               "Sex Ratio",
                               "Urban Unemployment Rate")) 


m1 <- lm(hexie ~ awindow3, data=df);summary(m1,diagnostics = TRUE)
m2 <- lm(hexie ~ awindow3 + protestsexMinority.l1 + as.factor(year), data=df);summary(m2,diagnostics=TRUE)
m3 <- lm(hexie ~ awindow3 + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year), data=df);summary(m3)

m4 <- lm(wending ~ awindow3, data=df);summary(m4)
m5 <- lm(wending ~ awindow3 + protestsexMinority.l1 + as.factor(year), data=df);summary(m5)
m6 <- lm(wending ~ awindow3 + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year), data=df);summary(m6)

# first stage 
stargazer(m4,m5,m6,m1,m2,m3,
          type="latex",
          title="IV Results: First Stage",
          label="Table: First Stage",
          font.size="small",
          omit=c("year"),
          dep.var.labels = c("``Stability''","``Harmony''"),
          add.lines=list(c("Year Fixed Effects","No","Yes","Yes","No","Yes","Yes")) ,
          order=c("awindow3","dwindow3","pwindow3","cwindow3","protestsexMinority.l1"),
          covariate.labels = c("Separatist Anniversary",
                               "Democratic Anniversary",
                               "Political Anniversary",
                               "Cultural Anniversary",
                               "Protests$_{t-1}$",
                               "Log GRP",
                               "Log Population",
                               "Rural Population Share",
                               "Sex Ratio",
                               "Urban Unemployment Rate")) 


#####################################################
# Figure 6: Simulated IV Results                    #
#####################################################

df$year = as.factor(df$year) # factorize for zelig

## stability
z.out1 <- zelig(protestsexMinoritySumPost7 ~ wending + protestsexMinority.l1 + year | awindow3 + protestsexMinority.l1 + year , data=df,model="ivreg")
summary(z.out1)
z.out1 %>% setx(wending = c(0,25),year=2014) %>% # choose a middle year since protest level trended upwards over time period
  sim() %>%
  plot(xlab="'Stability' References",ylab="Predicted Protests",
       ylim=c(0,40),xlim=c(0,25))

abline(v=mean(df$wending,na.rm=T),lty="dashed",col="red")
abline(v=mean(df$wending)+1*sd(df$wending),lty="dashed",col="red")
abline(v=mean(df$wending)+2*sd(df$wending),lty="dashed",col="red")
text(expression(mu),x=mean(df$wending,na.rm=T)+1,col="red",y=40)
text(expression(paste(mu, "+1",sigma)),x=mean(df$wending,na.rm=T)+1+1*sd(df$wending),col="red",y=40)
text(expression(paste(mu, "+2",sigma)),x=mean(df$wending,na.rm=T)+1+2*sd(df$wending),col="red",y=40)
filename = "IVPredictedProtestStabilityRR.pdf"
#dev.print(paste(figdir,filename,sep=""),device=pdf)

## harmony
z.out1 <- zelig(protestsexMinoritySumPost7 ~ hexie + protestsexMinority.l1 + year | awindow3+ protestsexMinority.l1 + year , data=df,model="ivreg")
z.out1 %>% setx(hexie = c(0,25),year=2014) %>% # range(df$hexie)
  sim() %>%
  plot(xlab="'Harmony' References",ylab="Predicted Protests",ylim=c(0,40),
       xlim=c(0,25))

abline(v=mean(df$hexie,na.rm=T),lty="dashed",col="red")
abline(v=mean(df$hexie)+1*sd(df$hexie),lty="dashed",col="red")
abline(v=mean(df$hexie)+2*sd(df$hexie),lty="dashed",col="red")
text(expression(mu),x=mean(df$hexie,na.rm=T)+1,col="red",y=40)
text(expression(paste(mu, "+1",sigma)),x=mean(df$hexie,na.rm=T)+1+1*sd(df$hexie),col="red",y=40)
text(expression(paste(mu, "+2",sigma)),x=mean(df$hexie,na.rm=T)+1+2*sd(df$hexie),col="red",y=40)
filename = "IVPredictedProtestHarmonyRR.pdf"
#dev.print(paste(figdir,filename,sep=""),device=pdf)


##################################################### 
##################################################### 
##################################################### 
#                ONLINE APPENDIX                    #
##################################################### 
##################################################### 
##################################################### 

######################################################
# Figure 1: Term Frequency Counts                    #
######################################################

hist(df$hexie,col="cadetblue",main="Histogram of 'Harmony'",xlab="References",xlim=c(0,170),breaks=100)
abline(v=mean(df$hexie,na.rm=T),lty="dashed",col="red")
text(expression(mu),x=mean(df$hexie,na.rm=T)+ 8,col="red",y=1100,cex=1.5)
filename = "HistHarmony.pdf"
#dev.print(paste(figdir,filename,sep=""),device=pdf)

hist(df$wending,col="cadetblue",main="Histogram of 'Stability'",xlab="References",xlim=c(0,170),breaks=50)
abline(v=mean(df$wending,na.rm=T),lty="dashed",col="red")
text(expression(mu),x=mean(df$wending,na.rm=T)+ 8,col="red",y=625,cex=1.5)
filename = "HistStability.pdf"
#dev.print(paste(figdir,filename,sep=""),device=pdf)

######################################################
# Figure 2: Provincial Protest Rates                 #
######################################################

#### Total Protests 

df = read.csv(paste(datadir,'df_nation_day_rr_3_repression.csv',sep=""),stringsAsFactors=F)
t = as.data.frame(rep(NA,length(unique(df$province))))
rownames(t) = sort(unique(df$province))
for(i in 1:nrow(t)){
  t[i,] = nrow(df[df$province==rownames(t)[i],])
}

t = t(t)
par(mar=c(9,6,0,0))
barplot(t,las=2,col="coral4",ylim=c(0,2700))
mtext("Total Protests", side=2, line=4, col="Black")
#dev.print(file=paste(figdir,"protestProvince.pdf",sep=""),device=pdf)

#### Protests Per Capita 

# divide total protests by avg pop in that province during time period
df = read.csv(paste(datadir,'df_nation_day_rr_3_repression.csv',sep=""),stringsAsFactors=F)
t = as.data.frame(rep(NA,length(unique(df$province))))
rownames(t) = sort(unique(df$province))
for(i in 1:nrow(t)){
  t[i,] = nrow(df[df$province==rownames(t)[i],])
}
colnames(t) = c("totalProtests")
t$avgPop = NA
for(i in 1:nrow(t)){
  t$avgPop[i] = mean(df$Population.resident.year.end.provincial[df$province==rownames(t)[i]],na.rm=T)
}

t$protestsPerCapita = t$totalProtests/(t$avgPop*10000)
t$totalProtests = NULL
t$avgPop = NULL
t = t(t)

par(mar=c(9,7,1,0))
barplot(t,las=2,col="coral4")
mtext("Annual Protests Per Capita", side=2, line=6, col="Black")
#dev.print(file=paste(figdir,"protestProvincePerCapitaPerYear.pdf",sep=""),device=pdf)

######################################################
# Figure 3: Protests Over Time                       #
######################################################

df = read.csv(paste(datadir,'df_nation_day_rr_3.csv',sep=""));df$X=NULL;df$date=as.Date(df$date);names(df)
df$logGRP = log(df$GRP)
df$logPop = log(df$Population.resident.year.end)
df$CPI = df$CPI-100
df$Pension.shortfall = df$Pension.shortfall/1000000 # in millions

# move to nation-day level 
nat = ddply(df,.(date),summarize,
            protests=sum(protests,na.rm=T),
            clb=sum(clb,na.rm=TRUE),
            elf=sum(elf,na.rm=TRUE)            
)
nat$year = as.numeric(substr(nat$date,1,4))
nrow(nat[nat$protests>0,])/nrow(nat)


plot(nat$date,nat$clb,col="lightgray",pch=3,bty="n",xlab="Year",ylab="Protests Per Day",cex=1.4,cex.lab=1.4,cex.axis=1.4)
points(nat$date,nat$elf,col="lightgray")

lines(loess.smooth(nat$date[nat$year>=2011],nat$clb[nat$year>=2011],span=.1), col="coral4",lwd=2)
lines(loess.smooth(nat$date[nat$year<2011],nat$elf[nat$year<2011],span=.1), col="navy",lwd=2)

legend("topleft",col=c("navy","coral4"),c("ELF","CLB"),lwd=2,bty="n",cex=1.5)
#dev.print(file=paste(figdir,"protestRecords.pdf",sep=""),device=pdf)

######################################################
# Figure 5: Outmigration from Tibet                  #
######################################################

dat = read.csv(paste(datadir,'df_province_day_npc_cdp_rr_2.csv',sep=""),stringsAsFactors=F) 
ann = ddply(dat, .(year,province), summarize, pop = mean(Population.resident.year.end)) # Total Population (year-end)(10000 persons)
ann$pop = ann$pop*10000
ann = ann[ann$year==2010,]
ann
sum(ann$pop) # 1,333,850,000
ann = rbind(c(2010,"Nation",sum(ann$pop)),ann)

dat = read.csv(paste(datadir,'PopulationRegisteredInOtherProvincesCensus2010.csv',sep=""),stringsAsFactors=F) 
ann = merge(ann,dat,by="province")
ann$pop = as.numeric(ann$pop)
ann$migrantsTibet = as.numeric(ann$migrantsTibet)
ann$migrantsXinjiang = as.numeric(ann$migrantsXinjiang)
ann$minorityProvince = 0
ann$minorityProvince[ann$province %in% c("Tibet","Xinjiang","Sichuan","Gansu","Qinghai","Inner.Mongolia","Ningxia","Yunnan")]  = 1

211901 / sum(ann$pop[ann$minorityProvince==0 & ann$province!="Nation"]) #0.0001891551

ann$migrants = ann$migrantsTibet + ann$migrantsXinjiang
ann = ann[ann$province!="Nation",]

ann$province[ann$province=="Inner.Mongolia"] = "Inner Mongolia"
red = c("Xinjiang","Tibet")
green = c("Inner Mongolia","Ningxia","Gansu","Qinghai","Sichuan","Yunnan","Beijing")
white = c("Heilongjiang","Jilin","Liaoning","Hebei","Tianjin","Shanxi","Shandong","Shaanxi","Henan","Anhui","Jiangsu","Shanghai","Hubei","Chongqing","Zhejiang","Jiangxi","Hunan","Guizhou","Fujian","Guangxi","Guangdong","Hainan")
ann$color[ann$province %in% red] = "coral3"
ann$color[ann$province %in% green] = "cadetblue"
ann$color[ann$province %in% white] = "beige"

ann$share = ann$migrantsTibet / ann$pop
ann = ann[order(ann$share),]
rownames(ann) = ann$province

par(mar=c(8,5,3,0)) 
barplot(ann$share,
        names.arg=rownames(ann),las=2,col=ann$color,
        main = "Outmigration from Tibet",
        ylim=c(0,0.002)) 

mtext("Current Residence", side=1, line=7)
mtext("Migrants / Provincial Population", side=2, line=4)

filename = "OutmigrationTibet2010Share.pdf"
#dev.print(paste(figdir,filename,sep=""),device=pdf)

######################################################
# Figure 6: Outmigration from Xinjiang               #
######################################################

ann$share = ann$migrantsXinjiang / ann$pop
ann = ann[order(ann$share),]
rownames(ann) = ann$province

par(mar=c(8,5,3,0)) 
barplot(ann$share,
        names.arg=rownames(ann),las=2,col=ann$color,
        main = "Outmigration from Xinjiang",
        ylim=c(0,0.002)) 

mtext("Current Residence", side=1, line=7)
mtext("Migrants / Provincial Population", side=2, line=4)

filename = "OutmigrationXinjiang2010Share.pdf"
#dev.print(paste(figdir,filename,sep=""),device=pdf)


######################################################
# Table 3: Threats and Propaganda                    #
######################################################

##### Topics

alpha = .25
cutpoint <- mean(df$tone, na.rm=T) + alpha*sd(df$tone, na.rm=T);df$hi = 0;df$hi[df$tone>=cutpoint] = 1
m.25s = lm(social.stability ~ hi, data=df)
m.25t = lm(threats.topic ~ hi, data=df)

alpha = .5
cutpoint <- mean(df$tone, na.rm=T) + alpha*sd(df$tone, na.rm=T);df$hi = 0;df$hi[df$tone>=cutpoint] = 1
m.5s = lm(social.stability ~ hi, data=df)
m.5t = lm(threats.topic ~ hi, data=df)

alpha = .75
cutpoint <- mean(df$tone, na.rm=T) + alpha*sd(df$tone, na.rm=T);df$hi = 0;df$hi[df$tone>=cutpoint] = 1
m.75s = lm(social.stability ~ hi, data=df)
m.75t = lm(threats.topic ~ hi, data=df)

alpha = 1
cutpoint <- mean(df$tone, na.rm=T) + alpha*sd(df$tone, na.rm=T);df$hi = 0;df$hi[df$tone>=cutpoint] = 1
m1s = lm(social.stability ~ hi, data=df)
m1t = lm(threats.topic ~ hi, data=df)

alpha = 1.25
cutpoint <- mean(df$tone, na.rm=T) + alpha*sd(df$tone, na.rm=T);df$hi = 0;df$hi[df$tone>=cutpoint] = 1
m1.25s = lm(social.stability ~ hi, data=df)
m1.25t = lm(threats.topic ~ hi, data=df)

alpha = 1.5
cutpoint <- mean(df$tone, na.rm=T) + alpha*sd(df$tone, na.rm=T);df$hi = 0;df$hi[df$tone>=cutpoint] = 1
m1.5s = lm(social.stability ~ hi, data=df)
m1.5t = lm(threats.topic ~ hi, data=df)

alpha = 1.75
cutpoint <- mean(df$tone, na.rm=T) + alpha*sd(df$tone, na.rm=T);df$hi = 0;df$hi[df$tone>=cutpoint] = 1
m1.75s = lm(social.stability ~ hi, data=df)
m1.75t = lm(threats.topic ~ hi, data=df)

alpha = 2
cutpoint <- mean(df$tone, na.rm=T) + alpha*sd(df$tone, na.rm=T);df$hi = 0;df$hi[df$tone>=cutpoint] = 1
m2s = lm(social.stability ~ hi, data=df)
m2t = lm(threats.topic ~ hi, data=df)

stargazer(m.25s,m.5s,m.75s,m1s,m1.25s,m1.5s,m1.75s,m2s,title="Bivariate Threats and Propaganda",label="Table: social stability alphas",font.size="small")
stargazer(m.25t,m.5t,m.75t,m1t,m1.25t,m1.5t,m1.75t,m2t)


##### References

alpha = .25
cutpoint <- mean(df$tone, na.rm=T) + alpha*sd(df$tone, na.rm=T);df$hi = 0;df$hi[df$tone>=cutpoint] = 1
m.25s = lm(wending ~ hi, data=df)
m.25t = lm(hexie ~ hi, data=df)

alpha = .5
cutpoint <- mean(df$tone, na.rm=T) + alpha*sd(df$tone, na.rm=T);df$hi = 0;df$hi[df$tone>=cutpoint] = 1
m.5s = lm(wending ~ hi, data=df)
m.5t = lm(hexie ~ hi, data=df)

alpha = .75
cutpoint <- mean(df$tone, na.rm=T) + alpha*sd(df$tone, na.rm=T);df$hi = 0;df$hi[df$tone>=cutpoint] = 1
m.75s = lm(wending ~ hi, data=df)
m.75t = lm(hexie ~ hi, data=df)

alpha = 1
cutpoint <- mean(df$tone, na.rm=T) + alpha*sd(df$tone, na.rm=T);df$hi = 0;df$hi[df$tone>=cutpoint] = 1
m1s = lm(wending ~ hi, data=df)
m1t = lm(hexie ~ hi, data=df)

alpha = 1.25
cutpoint <- mean(df$tone, na.rm=T) + alpha*sd(df$tone, na.rm=T);df$hi = 0;df$hi[df$tone>=cutpoint] = 1
m1.25s = lm(wending ~ hi, data=df)
m1.25t = lm(hexie ~ hi, data=df)

alpha = 1.5
cutpoint <- mean(df$tone, na.rm=T) + alpha*sd(df$tone, na.rm=T);df$hi = 0;df$hi[df$tone>=cutpoint] = 1
m1.5s = lm(wending ~ hi, data=df)
m1.5t = lm(hexie ~ hi, data=df)

alpha = 1.75
cutpoint <- mean(df$tone, na.rm=T) + alpha*sd(df$tone, na.rm=T);df$hi = 0;df$hi[df$tone>=cutpoint] = 1
m1.75s = lm(wending ~ hi, data=df)
m1.75t = lm(hexie ~ hi, data=df)

alpha = 2
cutpoint <- mean(df$tone, na.rm=T) + alpha*sd(df$tone, na.rm=T);df$hi = 0;df$hi[df$tone>=cutpoint] = 1
m2s = lm(wending ~ hi, data=df)
m2t = lm(hexie ~ hi, data=df)

stargazer(m.25s,m.5s,m.75s,m1s,m1.25s,m1.5s,m1.75s,m2s,title="Bivariate Threats (Terms) and Propaganda",label="Table: social stability alphas terms",font.size="small")
stargazer(m.25t,m.5t,m.75t,m1t,m1.25t,m1.5t,m1.75t,m2t)


######################################################
# Table 4: Naive Estimates of Threats and Protest    #
######################################################

m1 <- lm(protestsexMinoritySumPost7 ~ social.stability + protestsexMinority.l1 + awindow3 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year), data=df);summary(m1)
m2 <- lm(protestsexMinoritySumPost7 ~ threats.topic + protestsexMinority.l1 + awindow3 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year), data=df);summary(m2)
m3 <- lm(protestsexMinoritySumPost7 ~ wending + protestsexMinority.l1 + awindow3 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year), data=df);summary(m3)
m4 <- lm(protestsexMinoritySumPost7 ~ hexie + protestsexMinority.l1 + awindow3 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year), data=df);summary(m4)

stargazer(m1,m2,m3,m4,type="latex",
          title="Naive Estimates of Threats and Protest",
          label="Table: Naive",
          font.size="small",
          omit=c("year"),
          add.lines=list(c("Year Fixed Effects","Yes","Yes","Yes","Yes")),
          dep.var.labels = "Protests$_{t+1:t+7}$",
          covariate.labels = c("Social Stability",
                               "Law Enforcement",
                               "`Stability'",
                               "`Harmony'",
                               "Protests$_{t-1}$",
                               "Separatist Anniversary",
                               "Democratic Anniversary",
                               "Political Anniversary",
                               "Cultural Anniversary",
                               "Log GRP",
                               "Log Population",
                               "Rural Population Share",
                               "Sex Ratio",
                               "Urban Unemployment Rate")) 


######################################################
# Table 5: IV Sensitivity Analysis                   #
######################################################

### adapted from Zhu 2016

df$year = as.factor(df$year)
attach(df)

mean(protestsexMinoritySumPost7,na.rm=T)
sd(protestsexMinoritySumPost7,na.rm=T)
median(protestsexMinoritySumPost7,na.rm=T)


##################
#### MODEL 1 #####
##################

ivsensianalysisM1<-function(mydata){
  
  ##specify the set of possible gamma values
  n.sim<-500   # number of simulations
  gamma<-seq(-5, -1, length.out = n.sim) # specify a set of values for gamma. only negative violations make sense for us
  k<-2      # M1
  
  betas<-matrix(NA,k,n.sim) # matrix to store point estimates
  ses<-matrix(NA,k,n.sim)  # matrix to store standard errors
  
  for (i in 1:n.sim){
    
    ##first stage regression
    fit1<-lm(wending ~ awindow3,x=TRUE, data=mydata) 
    
    wending.hat<-fitted(fit1)
    
    zmat<-fit1$x
    
    ##remove the direct effect of the instrument on the dependent variable 
    y<-mydata[,1]-gamma[i]*mydata$awindow3
    
    ##second stage regression
    fit2<-lm(y~wending.hat,x=TRUE, data=mydata)
    
    ##correct SEs for the second stage regression
    xmat<-fit2$x
    xmat[,"wending.hat"]<-mydata$wending
    
    resds<-y - xmat%*%coef(fit2)
    N<-nrow(mydata)
    omega<-diag(diag((resds)%*%t(resds)),N)
    vmat<-solve(t(xmat)%*%zmat%*%solve(t(zmat)%*%omega%*%zmat)%*%t(zmat)%*%xmat)
    se.gmm<-sqrt(diag(vmat))
    betas[,i]<-coef(fit2)
    ses[,i]<-se.gmm
  }
  
  ###calculate the 95% confidence interval of the point estimate
  upperbound.ci95<-array(NA,n.sim)
  lowerbound.ci95<-array(NA,n.sim)
  for (s in 1:n.sim){
    upperbound.ci95[s]<-betas[2,s]+1.96*ses[2,s]
    lowerbound.ci95[s]<-betas[2,s]-1.96*ses[2,s]
  }
  betahat.wending<-betas[2,]
  data.frame(cbind(gamma,betahat.wending,lowerbound.ci95,upperbound.ci95) )
}

# DV must come first in this list!
mydata1 <- na.omit(data.frame(protestsexMinoritySumPost7, wending, 
                              awindow3, 
                              stringsAsFactors=F))

output.model1 <- ivsensianalysisM1(mydata1)
head(output.model1[output.model1$upperbound.ci95<=0,])
#      gamma betahat.wending lowerbound.ci95 upperbound.ci95
#480  -1.160321      -0.8766432       -1.750262    -0.003024248

##################
#### MODEL 2 ##### 
##################

ivsensianalysisM2<-function(mydata){
  
  ##specify the set of possible gamma values
  n.sim<-500   # number of simulations
  gamma<-seq(-5, -1, length.out = n.sim) # specify a set of values for gamma. only negative violations make sense for us
  k<-11    # accounting for year fixed effects (M2)
  
  betas<-matrix(NA,k,n.sim) # matrix to store point estimates
  ses<-matrix(NA,k,n.sim)  # matrix to store standard errors
  
  for (i in 1:n.sim){
    
    ##first stage regression
    fit1<-lm(wending ~ awindow3 + protestsexMinority.l1 + year
             ,x=TRUE, data=mydata) 
    
    wending.hat<-fitted(fit1)
    
    zmat<-fit1$x
    
    ##remove the direct effect of the instrument on the dependent variable 
    y<-mydata[,1]-gamma[i]*mydata$awindow3
    
    ##second stage regression
    fit2<-lm(y~wending.hat + protestsexMinority.l1 + year
             ,x=TRUE, data=mydata)
    
    ##correct SEs for the second stage regression
    xmat<-fit2$x
    xmat[,"wending.hat"]<-mydata$wending
    
    resds<-y - xmat%*%coef(fit2)
    N<-nrow(mydata)
    omega<-diag(diag((resds)%*%t(resds)),N)
    vmat<-solve(t(xmat)%*%zmat%*%solve(t(zmat)%*%omega%*%zmat)%*%t(zmat)%*%xmat)
    se.gmm<-sqrt(diag(vmat))
    betas[,i]<-coef(fit2)
    ses[,i]<-se.gmm
  }
  
  ###calculate the 95% confidence interval of the point estimate
  upperbound.ci95<-array(NA,n.sim)
  lowerbound.ci95<-array(NA,n.sim)
  for (s in 1:n.sim){
    upperbound.ci95[s]<-betas[2,s]+1.96*ses[2,s]
    lowerbound.ci95[s]<-betas[2,s]-1.96*ses[2,s]
  }
  betahat.wending<-betas[2,]
  data.frame(cbind(gamma,betahat.wending,lowerbound.ci95,upperbound.ci95) )
}

# DV must come first in this list!
mydata2 <- na.omit(data.frame(protestsexMinoritySumPost7, wending, 
                              protestsexMinority.l1,
                              year,
                              awindow3, 
                              stringsAsFactors=F))

output.model2 <- ivsensianalysisM2(mydata2)
head(output.model2[output.model2$upperbound.ci95<=0,])
#         gamma betahat.wending lowerbound.ci95 upperbound.ci95
# 389 -1.889780      -0.4380366      -0.8759748   -9.851437e-05

##################
#### MODEL 3 ##### 
##################

# if you include year fixed effects and the annual covars, get a system is computationally singular error on solve()
# this is caused by multicollinearity 
# https://stats.stackexchange.com/questions/90020/random-effects-model-with-plm-system-is-computationally-singular-error
# so omit annual covariates 

ivsensianalysisM3<-function(mydata){
  
  ##specify the set of possible gamma values
  n.sim<-500   # number of simulations
  gamma<-seq(-5, -1, length.out = n.sim) # specify a set of values for gamma. only negative violations make sense for us
  k<-14      # M3 with YFE, without annual covars
  
  betas<-matrix(NA,k,n.sim) # matrix to store point estimates
  ses<-matrix(NA,k,n.sim)  # matrix to store standard errors
  
  for (i in 1:n.sim){
    
    ##first stage regression
    fit1<-lm(wending ~ awindow3 + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 
             #+ logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate 
             + year
             ,x=TRUE, data=mydata) 
    
    
    wending.hat<-fitted(fit1)
    
    zmat<-fit1$x
    
    ##remove the direct effect of the instrument on the dependent variable 
    y<-mydata[,1]-gamma[i]*mydata$awindow3
    
    ##second stage regression
    fit2<-lm(y~wending.hat + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 
             #+ logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate 
             + year
             ,x=TRUE, data=mydata)
    
    ##correct SEs for the second stage regression
    xmat<-fit2$x
    xmat[,"wending.hat"]<-mydata$wending
    
    resds<-y - xmat%*%coef(fit2)
    N<-nrow(mydata)
    omega<-diag(diag((resds)%*%t(resds)),N)
    vmat<-solve(t(xmat)%*%zmat%*%solve(t(zmat)%*%omega%*%zmat)%*%t(zmat)%*%xmat)
    
    se.gmm<-sqrt(diag(vmat))
    betas[,i]<-coef(fit2)
    ses[,i]<-se.gmm
  }
  
  ###calculate the 95% confidence interval of the point estimate
  upperbound.ci95<-array(NA,n.sim)
  lowerbound.ci95<-array(NA,n.sim)
  for (s in 1:n.sim){
    upperbound.ci95[s]<-betas[2,s]+1.96*ses[2,s]
    lowerbound.ci95[s]<-betas[2,s]-1.96*ses[2,s]
  }
  betahat.wending<-betas[2,]
  data.frame(cbind(gamma,betahat.wending,lowerbound.ci95,upperbound.ci95) )
}

# DV must come first in this list!
mydata3 <- na.omit(data.frame(protestsexMinoritySumPost7, wending, 
                              protestsexMinority.l1,
                              dwindow3, pwindow3, cwindow3, 
                              #logGRP, logPop, Population.rural.share, Sample.survey.sex.ratio, Urban.unemployment.rate,
                              year,
                              awindow3, 
                              stringsAsFactors=F))

output.model3 <- ivsensianalysisM3(mydata3)

head(output.model3[output.model3$upperbound.ci95<=0,])
# gamma betahat.wending lowerbound.ci95 upperbound.ci95
# 427 -1.585170      -0.4952726      -0.9890452    -0.001500037

########################
#### MODEL 1 hexie #####
########################

ivsensianalysisM1h<-function(mydata){
  
  ##specify the set of possible gamma values
  n.sim<-500   # number of simulations
  gamma<-seq(-5, 1, length.out = n.sim) # only negative violations make sense for us
  k<-2      # M1
  
  betas<-matrix(NA,k,n.sim) # matrix to store point estimates
  ses<-matrix(NA,k,n.sim)  # matrix to store standard errors
  
  for (i in 1:n.sim){
    
    ##first stage regression
    fit1<-lm(hexie ~ awindow3 ,x=TRUE, data=mydata) 
    
    hexie.hat<-fitted(fit1)
    
    zmat<-fit1$x
    
    ##remove the direct effect of the instrument on the dependent variable 
    y<-mydata[,1]-gamma[i]*mydata$awindow3
    
    ##second stage regression
    fit2<-lm(y~hexie.hat ,x=TRUE, data=mydata)
    
    ##correct SEs for the second stage regression
    xmat<-fit2$x
    xmat[,"hexie.hat"]<-mydata$hexie
    
    resds<-y - xmat%*%coef(fit2)
    N<-nrow(mydata)
    omega<-diag(diag((resds)%*%t(resds)),N)
    vmat<-solve(t(xmat)%*%zmat%*%solve(t(zmat)%*%omega%*%zmat)%*%t(zmat)%*%xmat)
    se.gmm<-sqrt(diag(vmat))
    betas[,i]<-coef(fit2)
    ses[,i]<-se.gmm
  }
  
  ###calculate the 95% confidence interval of the point estimate
  upperbound.ci95<-array(NA,n.sim)
  lowerbound.ci95<-array(NA,n.sim)
  for (s in 1:n.sim){
    upperbound.ci95[s]<-betas[2,s]+1.96*ses[2,s]
    lowerbound.ci95[s]<-betas[2,s]-1.96*ses[2,s]
  }
  betahat.hexie<-betas[2,]
  data.frame(cbind(gamma,betahat.hexie,lowerbound.ci95,upperbound.ci95) )
}

# DV must come first in this list!
mydata1h <- na.omit(data.frame(protestsexMinoritySumPost7, hexie, 
                               awindow3, 
                               stringsAsFactors=F))

output.model1h <- ivsensianalysisM1h(mydata1h)
head(output.model1h[output.model1h$upperbound.ci95<=0,])
#      gamma betahat.hexie lowerbound.ci95 upperbound.ci95
# 379 -0.4549098     -1.298143       -2.595842   -0.0004431444

########################
#### MODEL 2 hexie ##### 
########################

ivsensianalysisM2h<-function(mydata){
  
  ##specify the set of possible gamma values
  n.sim<-500   # number of simulations
  gamma<-seq(-5, 1, length.out = n.sim) # only negative violations make sense for us
  k<-11    # accounting for year fixed effects (M2)

  betas<-matrix(NA,k,n.sim) # matrix to store point estimates
  ses<-matrix(NA,k,n.sim)  # matrix to store standard errors
  
  for (i in 1:n.sim){
    
    ##first stage regression
    fit1<-lm(hexie ~ awindow3 + protestsexMinority.l1 + year
             ,x=TRUE, data=mydata) 
    
    hexie.hat<-fitted(fit1)
    
    zmat<-fit1$x
    
    ##remove the direct effect of the instrument on the dependent variable 
    y<-mydata[,1]-gamma[i]*mydata$awindow3
    
    ##second stage regression
    fit2<-lm(y~hexie.hat + protestsexMinority.l1 + year
             ,x=TRUE, data=mydata)
    
    ##correct SEs for the second stage regression
    xmat<-fit2$x
    xmat[,"hexie.hat"]<-mydata$hexie
    
    resds<-y - xmat%*%coef(fit2)
    N<-nrow(mydata)
    omega<-diag(diag((resds)%*%t(resds)),N)
    vmat<-solve(t(xmat)%*%zmat%*%solve(t(zmat)%*%omega%*%zmat)%*%t(zmat)%*%xmat)
    se.gmm<-sqrt(diag(vmat))
    betas[,i]<-coef(fit2)
    ses[,i]<-se.gmm
  }
  
  ###calculate the 95% confidence interval of the point estimate
  upperbound.ci95<-array(NA,n.sim)
  lowerbound.ci95<-array(NA,n.sim)
  for (s in 1:n.sim){
    upperbound.ci95[s]<-betas[2,s]+1.96*ses[2,s]
    lowerbound.ci95[s]<-betas[2,s]-1.96*ses[2,s]
  }
  betahat.hexie<-betas[2,]
  data.frame(cbind(gamma,betahat.hexie,lowerbound.ci95,upperbound.ci95) )
}

# DV must come first in this list!
mydata2h <- na.omit(data.frame(protestsexMinoritySumPost7, hexie, 
                               protestsexMinority.l1,
                               year,
                               awindow3, 
                               stringsAsFactors=F))

output.model2h <- ivsensianalysisM2h(mydata2h)
head(output.model2h[output.model2h$upperbound.ci95<=0,])
#      gamma betahat.hexie lowerbound.ci95 upperbound.ci95
# 291 -1.513026    -0.6562126       -1.312151   -0.0002737871

########################
#### MODEL 3 hexie ##### 
########################

# if you include year fixed effects and the annual covars, get a system is computationally singular error on solve()
# this is caused by multicollinearity 
# https://stats.stackexchange.com/questions/90020/random-effects-model-with-plm-system-is-computationally-singular-error
# so omit annual covariates 

ivsensianalysisM3h<-function(mydata){
  
  ##specify the set of possible gamma values
  n.sim<-500   # number of simulations
  gamma<-seq(-5, 1, length.out = n.sim) # only negative violations make sense for us
  k<-14      # M3 with YFE, without annual covars
  
  betas<-matrix(NA,k,n.sim) # matrix to store point estimates
  ses<-matrix(NA,k,n.sim)  # matrix to store standard errors
  
  for (i in 1:n.sim){
    
    ##first stage regression
    fit1<-lm(hexie ~ awindow3 + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 
             #+ logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate 
             + year
             ,x=TRUE, data=mydata) 
    
    
    hexie.hat<-fitted(fit1)
    
    zmat<-fit1$x
    
    ##remove the direct effect of the instrument on the dependent variable 
    y<-mydata[,1]-gamma[i]*mydata$awindow3
    
    ##second stage regression
    fit2<-lm(y~hexie.hat + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 
             #+ logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate 
             + year
             ,x=TRUE, data=mydata)
    
    ##correct SEs for the second stage regression
    xmat<-fit2$x
    xmat[,"hexie.hat"]<-mydata$hexie
    
    resds<-y - xmat%*%coef(fit2)
    N<-nrow(mydata)
    omega<-diag(diag((resds)%*%t(resds)),N)
    vmat<-solve(t(xmat)%*%zmat%*%solve(t(zmat)%*%omega%*%zmat)%*%t(zmat)%*%xmat)
    
    se.gmm<-sqrt(diag(vmat))
    betas[,i]<-coef(fit2)
    ses[,i]<-se.gmm
  }
  
  ###calculate the 95% confidence interval of the point estimate
  upperbound.ci95<-array(NA,n.sim)
  lowerbound.ci95<-array(NA,n.sim)
  for (s in 1:n.sim){
    upperbound.ci95[s]<-betas[2,s]+1.96*ses[2,s]
    lowerbound.ci95[s]<-betas[2,s]-1.96*ses[2,s]
  }
  betahat.hexie<-betas[2,]
  data.frame(cbind(gamma,betahat.hexie,lowerbound.ci95,upperbound.ci95) )
}

# DV must come first in this list!
mydata3h <- na.omit(data.frame(protestsexMinoritySumPost7, hexie, 
                               protestsexMinority.l1,
                               dwindow3, pwindow3, cwindow3, 
                               #logGRP, logPop, Population.rural.share, Sample.survey.sex.ratio, Urban.unemployment.rate,
                               year,
                               awindow3, 
                               stringsAsFactors=F))

output.model3h <- ivsensianalysisM3h(mydata3h)
head(output.model3h[output.model3h$upperbound.ci95<=0,])
#      gamma betahat.hexie lowerbound.ci95 upperbound.ci95
# 369 -0.575150301    -1.1216496       -2.242991   -0.0003081151

######################################################
# Figure 8: Distinctive Protest Discourse            #
######################################################

dat = read.csv(paste(datadir,'CLBscattertextDataTranslated.csv',sep=''),sep=',',stringsAsFactors=F);head(dat)
colnames(dat)[colnames(dat)=="term"] = "termChn"
colnames(dat)[colnames(dat)=="termEng"] = "term"
dat$term = tolower(dat$term)

dat = dat[tolower(dat$term) %in% 
            c("research club","city renovation","director of sales",
              "senior courtyard","where is the road?","page total",
              "article most","follow numbers","welcome follow","scan code follow",
              "reply","month reply","day reply","reply out","reply like",
              "rotate rotate","below dimension","client link"
            )==F,]

############ Sensitive Date Scattertext

dat$term[dat$term=="experiment school"] = "experimental school"

dat$preScale <- dat$precount / dat$predocs		
dat$postScale <- dat$postcount / dat$postdocs	

hi <- 0.70
lo <- 1 - hi
max <- 1.0
pre <- dat[dat$x<lo & dat$y>hi & dat$y<max,]; nrow(pre)
post <- dat[dat$x>hi & dat$x<max & dat$y<lo,]; nrow(post)
both <- dat[dat$x>hi & dat$y>hi,]
neither <- dat[dat$x<lo & dat$y<lo,]

sample <- seq(from=1, to=100, by=1)
input.pre <- pre[order(-pre$preScale), c("preScale")][sample]
input.pre = na.omit(input.pre)
names(input.pre) <- na.omit(pre[order(-pre$preScale), c("term")][sample])

names = tolower(names(input.pre))
mycolors = rep("grey",length(input.pre))

#pdf(paste(figdir,'CLBsensitiveRestrictedEng.pdf',sep=""), family="Helvetica")
layout(matrix(c(1,2), nrow=2), heights=c(0.3,3))
par(mar=c(0,0,0,0))
plot.new()
text(x=0.5, y=0.5, "Separatist Anniversary Protests", cex=1.75)
set.seed(1234)
wordcloud(scale=c(2.6,.5),words=names, freq=input.pre, min.freq=0, max.words=200, random.order=F, rot.per=0, colors=mycolors, ordered.colors=T,main=NA)
#dev.off()


######### Regular Date Scattertext

sample <- seq(from=1, to=100, by=1)
input.post <- post[order(-post$postScale), c("postScale")][sample]
names(input.post) <- post[order(-post$postScale), c("term")][sample]
names = tolower(names(input.post))
mycolors = rep("grey",length(input.post))

#pdf(paste(figdir,'CLBregularRestrictedEng.pdf',sep=""), family="Helvetica")
layout(matrix(c(1,2), nrow=2), heights=c(0.3,3))
par(mar=c(0,0,0,0))
plot.new()
text(x=0.5, y=0.5, "Other Protests", cex=1.75)
set.seed(1234)
wordcloud(scale=c(2.8,.5),words=names, freq=na.omit(input.post), random.order=F, rot.per=0, colors=mycolors, ordered.colors=T, main=NA) # brewer.pal(8, "Dark2")
#dev.off()

######################################################
# Table 6: Summary Statistics                        #
######################################################

df = read.csv(paste(datadir,'df_nation_day_rr_3.csv',sep=""));df$X=NULL;df$date=as.Date(df$date);names(df)
df$logGRP = log(df$GRP)
df$logPop = log(df$Population.resident.year.end)
df$CPI = df$CPI-100
df$Pension.shortfall = df$Pension.shortfall/1000000 # in millions

desc = df[c("social.stability","threats.topic","wending","hexie",
            "anniv","democratic","political","cultural",
            "protests",
            "protestsexMinority",
            "protestsSumPost7",
            "protestsexMinoritySumPost7",
            "logGRP","logPop","Population.rural.share","Sample.survey.sex.ratio","Urban.unemployment.rate")]

out = as.data.frame(t(stat.desc(desc)));out
out = out[c("nbr.val","nbr.na","min","max","mean","std.dev")]

colnames(out) = c("Nbr. Val.","Nbr. NA","Min","Max","Mean","Std. Dev")
rownames(out) = c("Social Stability$_{t}$",
                  "Law Enforcement$_{t}$",
                  "``Stability''$_{t}$",
                  "``Harmony''$_{t}$",
                  "Separatist Anniversary$_{t}$",
                  "Democratic Anniversary$_{t}$",
                  "Political Anniversary$_{t}$",
                  "Cultural Anniversary$_{t}$",
                  "Protests$_{t}$",
                  "Protests$^{Han}_{t}$",
                  "Protests$_{t+1:t+7}$",
                  "Protests$^{Han}_{t+1:t+7}$",
                  "Log GRP$_{s}$",
                  "Log Population$_{s}$",
                  "Rural Population Share$_{s}$",
                  "Sex Ratio$_{s}$",
                  "Urban Unemployment Rate$_{s}$"
)

out$Source = c(rep("Authors",8),
               rep("Elfstrom, CLB",4),
               rep("NBS",5))

xtable(out,
       caption = "Summary Statistics",
       label = "Table: Variable definitions")

######################################################
# Table 7: Protest Level Summary Statistics          #
######################################################

df = read.csv(paste(datadir,'df_nation_day_rr_3_repression.csv',sep=""),stringsAsFactors=F)

desc = df[c("violence",
            "anniv","democratic","political","cultural",
            "protests.l1",
            "logGRP","logPop","Population.rural.share","Sample.survey.sex.ratio","Urban.unemployment.rate")]

out = as.data.frame(t(stat.desc(desc)));out
out = out[c("nbr.val","nbr.na","min","max","mean","std.dev")]

colnames(out) = c("Nbr. Val.","Nbr. NA","Min","Max","Mean","Std. Dev")
rownames(out) = c("Repression$_{j}$",
                  "Separatist Anniversary$_{t}$",
                  "Democratic Anniversary$_{t}$",
                  "Political Anniversary$_{t}$",
                  "Cultural Anniversary$_{t}$",
                  "Protests$_{t-1}$",
                  #"Protests$^{Han}_{t}$",
                  "Log GRP$_{s}$",
                  "Log Population$_{s}$",
                  "Rural Population Share$_{s}$",
                  "Sex Ratio$_{s}$",
                  "Urban Unemployment Rate$_{s}$"
)

out$Source = c("Elfstrom, CLB",
               rep("Authors",4),
               "Elfstrom, CLB",
               rep("NBS",5))
xtable(out,
       caption = "Summary Statistics Protest Level",
       label = "Table: Variable definitions protest level")

######################################################
# Correlation Between Term Counts and Topic Models   #
######################################################

df = read.csv(paste(datadir,'df_nation_day_rr_3.csv',sep=""));df$X=NULL;df$date=as.Date(df$date);names(df)
df$logGRP = log(df$GRP)
df$logPop = log(df$Population.resident.year.end)
df$CPI = df$CPI-100
df$Pension.shortfall = df$Pension.shortfall/1000000 # in millions

m1 = lm(wending ~ social.stability,data=df);summary(m1)
m2 = lm(hexie ~ social.stability,data=df);summary(m2) 
m3 = lm(wending ~ threats.topic,data=df);summary(m3) 
m4 = lm(hexie ~ threats.topic,data=df);summary(m4)

stargazer(m1,m2,m3,m4,type="latex",title="Correlation Between Term Counts and Topic Models",label="Table: Correlations",font.size="small")


######################################################
# Tables 9 and 10: IV Results (Next Day)             #
######################################################

m1 <- ivreg(protestsexMinority.f1 ~ hexie | awindow3, data=df);summary(m1,diagnostics = TRUE)
m2 <- ivreg(protestsexMinority.f1 ~ hexie + protestsexMinority.l1 + as.factor(year) | awindow3 + protestsexMinority.l1 + as.factor(year), data=df);summary(m2,diagnostics=TRUE)
m3 <- ivreg(protestsexMinority.f1 ~ hexie + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year) | awindow3 + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year), data=df);summary(m3,diagnostics=TRUE)

m4 <- ivreg(protestsexMinority.f1 ~ wending | awindow3, data=df);summary(m4, diagnostics=TRUE)
m5 <- ivreg(protestsexMinority.f1 ~ wending + protestsexMinority.l1 + as.factor(year) | awindow3 + protestsexMinority.l1 + as.factor(year), data=df);summary(m5,diagnostics=TRUE)
m6 <- ivreg(protestsexMinority.f1 ~ wending + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year) | awindow3 + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year), data=df);summary(m6,diagnostics=TRUE)

# second stage
stargazer(m4,m5,m6,m1,m2,m3,
          type="latex",
          title="IV Results: Second Stage (Next Day)",
          label="Table: IV Results (Next Day)",
          font.size="small",
          omit=c("year"),
          dep.var.labels = c("Protests$_{t+1}$"),
          add.lines=list(c("Year Fixed Effects","No","Yes","Yes","No","Yes","Yes")) ,
          order=c("wending","hexie","dwindow3","pwindow3","cwindow3","protestsexMinority.l1"),
          covariate.labels = c("Stability",
                               "Harmony",
                               "Democratic Anniversary",
                               "Political Anniversary",
                               "Cultural Anniversary",
                               "Protests$_{t-1}$",
                               "Log GRP",
                               "Log Population",
                               "Rural Population Share",
                               "Sex Ratio",
                               "Urban Unemployment Rate")) 

m1 <- lm(hexie ~ awindow3, data=df);summary(m1)
m2 <- lm(hexie ~ awindow3 + protestsexMinority.l1 + as.factor(year), data=df);summary(m2)
m3 <- lm(hexie ~ awindow3 + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year), data=df);summary(m3)

m4 <- lm(wending ~ awindow3, data=df);summary(m4)
m5 <- lm(wending ~ awindow3 + protestsexMinority.l1 + as.factor(year), data=df);summary(m5)
m6 <- lm(wending ~ awindow3 + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year), data=df);summary(m6)

stargazer(m4,m5,m6,m1,m2,m3,
          type="latex",
          title="IV Results: First Stage (Next Day)",
          label="Table: First Stage (Next Day)",
          font.size="small",
          omit=c("year"),
          dep.var.labels = c("``Stability''","``Harmony''"),
          add.lines=list(c("Year Fixed Effects","No","Yes","Yes","No","Yes","Yes")) ,
          order=c("awindow3","dwindow3","pwindow3","cwindow3","protestsexMinority.l1"),
          covariate.labels = c("Separatist Anniversary",
                               "Democratic Anniversary",
                               "Political Anniversary",
                               "Cultural Anniversary",
                               "Protests$_{t-1}$",
                               "Log GRP",
                               "Log Population",
                               "Rural Population Share",
                               "Sex Ratio",
                               "Urban Unemployment Rate"))


#########################################################
# Tables 11 and 12: IV Results (Next Two Days)          #
#########################################################

# 2,3 day sum of protests, ex minority provinces
df[c("protestsexMinoritySumPost7","protestsexMinoritySumPost3")] = NA
for(i in 1:(nrow(df)-3)){
  df$protestsexMinoritySumPost2[i] = sum(na.omit(df$protestsexMinority[(i+1):(i+2)])) # no.omit is consequential here
  df$protestsexMinoritySumPost3[i] = sum(na.omit(df$protestsexMinority[(i+1):(i+3)])) # no.omit is consequential here
}    

m1 <- ivreg(protestsexMinoritySumPost2 ~ hexie | awindow3, data=df);summary(m1,diagnostics = TRUE)
m2 <- ivreg(protestsexMinoritySumPost2 ~ hexie + protestsexMinority.l1 + as.factor(year) | awindow3 + protestsexMinority.l1 + as.factor(year), data=df);summary(m2,diagnostics=TRUE)
m3 <- ivreg(protestsexMinoritySumPost2 ~ hexie + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year) | awindow3 + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year), data=df);summary(m3,diagnostics=TRUE)

m4 <- ivreg(protestsexMinoritySumPost2 ~ wending | awindow3, data=df);summary(m4, diagnostics=TRUE)
m5 <- ivreg(protestsexMinoritySumPost2 ~ wending + protestsexMinority.l1 + as.factor(year) | awindow3 + protestsexMinority.l1 + as.factor(year), data=df);summary(m5,diagnostics=TRUE)
m6 <- ivreg(protestsexMinoritySumPost2 ~ wending + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year) | awindow3 + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year), data=df);summary(m6,diagnostics=TRUE)

# second stage
stargazer(m4,m5,m6,m1,m2,m3,
          type="latex",
          title="IV Results: Second Stage (Next Two Days)",
          label="Table: IV Results (Next 2 Days)",
          font.size="small",
          omit=c("year"),
          dep.var.labels = c("Protests$_{t+1:t+2}$"),
          add.lines=list(c("Year Fixed Effects","No","Yes","Yes","No","Yes","Yes")) ,
          order=c("wending","hexie","dwindow3","pwindow3","cwindow3","protestsexMinority.l1"),
          covariate.labels = c("Stability",
                               "Harmony",
                               "Democratic Anniversary",
                               "Political Anniversary",
                               "Cultural Anniversary",
                               "Protests$_{t-1}$",
                               "Log GRP",
                               "Log Population",
                               "Rural Population Share",
                               "Sex Ratio",
                               "Urban Unemployment Rate")) 

m1 <- lm(hexie ~ awindow3, data=df);summary(m1)
m2 <- lm(hexie ~ awindow3 + protestsexMinority.l1 + as.factor(year), data=df);summary(m2)
m3 <- lm(hexie ~ awindow3 + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year), data=df);summary(m3)

m4 <- lm(wending ~ awindow3, data=df);summary(m4)
m5 <- lm(wending ~ awindow3 + protestsexMinority.l1 + as.factor(year), data=df);summary(m5)
m6 <- lm(wending ~ awindow3 + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year), data=df);summary(m6)

stargazer(m4,m5,m6,m1,m2,m3,
          type="latex",
          title="IV Results: First Stage (Next Two Days)",
          label="Table: First Stage (Next 2 Days)",
          font.size="small",
          omit=c("year"),
          dep.var.labels = c("``Stability''","``Harmony''"),
          add.lines=list(c("Year Fixed Effects","No","Yes","Yes","No","Yes","Yes")) ,
          order=c("awindow3","dwindow3","pwindow3","cwindow3","protestsexMinority.l1"),
          covariate.labels = c("Separatist Anniversary",
                               "Democratic Anniversary",
                               "Political Anniversary",
                               "Cultural Anniversary",
                               "Protests$_{t-1}$",
                               "Log GRP",
                               "Log Population",
                               "Rural Population Share",
                               "Sex Ratio",
                               "Urban Unemployment Rate"))


#########################################################
# Tables 13 and 14: IV Results (Next Three Days)        #
#########################################################

m1 <- ivreg(protestsexMinoritySumPost3 ~ hexie | awindow3, data=df);summary(m1,diagnostics = TRUE)
m2 <- ivreg(protestsexMinoritySumPost3 ~ hexie + protestsexMinority.l1 + as.factor(year) | awindow3 + protestsexMinority.l1 + as.factor(year), data=df);summary(m2,diagnostics=TRUE)
m3 <- ivreg(protestsexMinoritySumPost3 ~ hexie + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year) | awindow3 + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year), data=df);summary(m3,diagnostics=TRUE)

m4 <- ivreg(protestsexMinoritySumPost3 ~ wending | awindow3, data=df);summary(m4, diagnostics=TRUE)
m5 <- ivreg(protestsexMinoritySumPost3 ~ wending + protestsexMinority.l1 + as.factor(year) | awindow3 + protestsexMinority.l1 + as.factor(year), data=df);summary(m5,diagnostics=TRUE)
m6 <- ivreg(protestsexMinoritySumPost3 ~ wending + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year) | awindow3 + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year), data=df);summary(m6,diagnostics=TRUE)

# second stage
stargazer(m4,m5,m6,m1,m2,m3,
          type="latex",
          title="IV Results: Second Stage (Next Three Days)",
          label="Table: IV Results (Next 3 Days)",
          font.size="small",
          omit=c("year"),
          dep.var.labels = c("Protests$_{t+1:t+3}$"),
          add.lines=list(c("Year Fixed Effects","No","Yes","Yes","No","Yes","Yes")) ,
          order=c("wending","hexie","dwindow3","pwindow3","cwindow3","protestsexMinority.l1"),
          covariate.labels = c("Stability",
                               "Harmony",
                               "Democratic Anniversary",
                               "Political Anniversary",
                               "Cultural Anniversary",
                               "Protests$_{t-1}$",
                               "Log GRP",
                               "Log Population",
                               "Rural Population Share",
                               "Sex Ratio",
                               "Urban Unemployment Rate")) 

m1 <- lm(hexie ~ awindow3, data=df);summary(m1)
m2 <- lm(hexie ~ awindow3 + protestsexMinority.l1 + as.factor(year), data=df);summary(m2)
m3 <- lm(hexie ~ awindow3 + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year), data=df);summary(m3)

m4 <- lm(wending ~ awindow3, data=df);summary(m4)
m5 <- lm(wending ~ awindow3 + protestsexMinority.l1 + as.factor(year), data=df);summary(m5)
m6 <- lm(wending ~ awindow3 + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year), data=df);summary(m6)

stargazer(m4,m5,m6,m1,m2,m3,
          type="latex",
          title="IV Results: First Stage (Next Three Days)",
          label="Table: First Stage (Next 3 Days)",
          font.size="small",
          omit=c("year"),
          dep.var.labels = c("``Stability''","``Harmony''"),
          add.lines=list(c("Year Fixed Effects","No","Yes","Yes","No","Yes","Yes")) ,
          order=c("awindow3","dwindow3","pwindow3","cwindow3","protestsexMinority.l1"),
          covariate.labels = c("Separatist Anniversary",
                               "Democratic Anniversary",
                               "Political Anniversary",
                               "Cultural Anniversary",
                               "Protests$_{t-1}$",
                               "Log GRP",
                               "Log Population",
                               "Rural Population Share",
                               "Sex Ratio",
                               "Urban Unemployment Rate"))

#########################################################
# Tables 15 and 16: IV Results (2009-2012)              #
#########################################################

df = df[df$year<=2012,]; table(df$year)

m1 <- ivreg(protestsexMinoritySumPost7 ~ hexie | awindow3, data=df);summary(m1,diagnostics = TRUE)
m2 <- ivreg(protestsexMinoritySumPost7 ~ hexie + protestsexMinority.l1 + as.factor(year) | awindow3 + protestsexMinority.l1 + as.factor(year), data=df);summary(m2,diagnostics=TRUE)
m3 <- ivreg(protestsexMinoritySumPost7 ~ hexie + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year) | awindow3 + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year), data=df);summary(m3,diagnostics=TRUE)

m4 <- ivreg(protestsexMinoritySumPost7 ~ wending | awindow3, data=df);summary(m4, diagnostics=TRUE)
m5 <- ivreg(protestsexMinoritySumPost7 ~ wending + protestsexMinority.l1 + as.factor(year) | awindow3 + protestsexMinority.l1 + as.factor(year), data=df);summary(m5,diagnostics=TRUE)
m6 <- ivreg(protestsexMinoritySumPost7 ~ wending + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year) | awindow3 + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year), data=df);summary(m6,diagnostics=TRUE)

# second stage
stargazer(m4,m5,m6,m1,m2,m3,
          type="latex",
          title="IV Results: Second Stage (2009-2012)",
          label="Table: IV Results 2009-2012",
          font.size="small",
          omit=c("year"),
          dep.var.labels = c("Protests$_{t+1:t+7}$"),
          add.lines=list(c("Year Fixed Effects","No","Yes","Yes","No","Yes","Yes")) ,
          order=c("wending","hexie","dwindow3","pwindow3","cwindow3","protestsexMinority.l1"),
          covariate.labels = c("Stability",
                               "Harmony",
                               "Democratic Anniversary",
                               "Political Anniversary",
                               "Cultural Anniversary",
                               "Protests$_{t-1}$",
                               "Log GRP",
                               "Log Population",
                               "Rural Population Share",
                               "Sex Ratio",
                               "Urban Unemployment Rate")) 

m1 <- lm(hexie ~ awindow3, data=df);summary(m1)
m2 <- lm(hexie ~ awindow3 + protestsexMinority.l1 + as.factor(year), data=df);summary(m2)
m3 <- lm(hexie ~ awindow3 + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year), data=df);summary(m3)

m4 <- lm(wending ~ awindow3, data=df);summary(m4)
m5 <- lm(wending ~ awindow3 + protestsexMinority.l1 + as.factor(year), data=df);summary(m5)
m6 <- lm(wending ~ awindow3 + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year), data=df);summary(m6)

stargazer(m4,m5,m6,m1,m2,m3,
          type="latex",
          title="IV Results: First Stage (2009-2012)",
          label="Table: First Stage 2009-2012",
          font.size="small",
          omit=c("year"),
          dep.var.labels = c("``Stability''","``Harmony''"),
          add.lines=list(c("Year Fixed Effects","No","Yes","Yes","No","Yes","Yes")) ,
          order=c("awindow3","dwindow3","pwindow3","cwindow3","protestsexMinority.l1"),
          covariate.labels = c("Separatist Anniversary",
                               "Democratic Anniversary",
                               "Political Anniversary",
                               "Cultural Anniversary",
                               "Protests$_{t-1}$",
                               "Log GRP",
                               "Log Population",
                               "Rural Population Share",
                               "Sex Ratio",
                               "Urban Unemployment Rate"))


#########################################################
# Tables 17 and 18: IV Results (Ex Guangdong)           #
#########################################################

df = read.csv(paste(datadir,'df_nation_day_rr_2_ex_guangdong.csv',sep=""));df$X=NULL;df$date=as.Date(df$date);names(df)
df$logGRP = log(df$GRP)
df$logPop = log(df$Population.resident.year.end)
df$CPI = df$CPI-100
df$Pension.shortfall = df$Pension.shortfall/1000000 # in millions

m1 <- ivreg(protestsexMinoritySumPost7 ~ hexie | awindow3, data=df);summary(m1,diagnostics = TRUE)
m2 <- ivreg(protestsexMinoritySumPost7 ~ hexie + protestsexMinority.l1 + as.factor(year) | awindow3 + protestsexMinority.l1 + as.factor(year), data=df);summary(m2,diagnostics=TRUE)
m3 <- ivreg(protestsexMinoritySumPost7 ~ hexie + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year) | awindow3 + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year), data=df);summary(m3,diagnostics=TRUE)

m4 <- ivreg(protestsexMinoritySumPost7 ~ wending | awindow3, data=df);summary(m4, diagnostics=TRUE)
m5 <- ivreg(protestsexMinoritySumPost7 ~ wending + protestsexMinority.l1 + as.factor(year) | awindow3 + protestsexMinority.l1 + as.factor(year), data=df);summary(m5,diagnostics=TRUE)
m6 <- ivreg(protestsexMinoritySumPost7 ~ wending + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year) | awindow3 + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year), data=df);summary(m6,diagnostics=TRUE)

# second stage
stargazer(m4,m5,m6,m1,m2,m3,
          type="latex",
          title="IV Results: Second Stage (Ex Guangdong)",
          label="Table: IV Results Ex Guangdong",
          font.size="small",
          omit=c("year"),
          dep.var.labels = c("Protests$_{t+1:t+7}$"),
          add.lines=list(c("Year Fixed Effects","No","Yes","Yes","No","Yes","Yes")) ,
          order=c("wending","hexie","dwindow3","pwindow3","cwindow3","protestsexMinority.l1"),
          covariate.labels = c("Stability",
                               "Harmony",
                               "Democratic Anniversary",
                               "Political Anniversary",
                               "Cultural Anniversary",
                               "Protests$_{t-1}$",
                               "Log GRP",
                               "Log Population",
                               "Rural Population Share",
                               "Sex Ratio",
                               "Urban Unemployment Rate")) 

m1 <- lm(hexie ~ awindow3, data=df);summary(m1)
m2 <- lm(hexie ~ awindow3 + protestsexMinority.l1 + as.factor(year), data=df);summary(m2)
m3 <- lm(hexie ~ awindow3 + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year), data=df);summary(m3)

m4 <- lm(wending ~ awindow3, data=df);summary(m4)
m5 <- lm(wending ~ awindow3 + protestsexMinority.l1 + as.factor(year), data=df);summary(m5)
m6 <- lm(wending ~ awindow3 + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year), data=df);summary(m6)

stargazer(m4,m5,m6,m1,m2,m3,
          type="latex",
          title="IV Results: First Stage (Ex Guangdong)",
          label="Table: First Stage Ex Guangdong",
          font.size="small",
          omit=c("year"),
          dep.var.labels = c("``Stability''","``Harmony''"),
          add.lines=list(c("Year Fixed Effects","No","Yes","Yes","No","Yes","Yes")) ,
          order=c("awindow3","dwindow3","pwindow3","cwindow3","protestsexMinority.l1"),
          covariate.labels = c("Separatist Anniversary",
                               "Democratic Anniversary",
                               "Political Anniversary",
                               "Cultural Anniversary",
                               "Protests$_{t-1}$",
                               "Log GRP",
                               "Log Population",
                               "Rural Population Share",
                               "Sex Ratio",
                               "Urban Unemployment Rate"))


#########################################################
# Tables 19 and 20: IV Results (Ex Winter)              #
#########################################################

df = read.csv(paste(datadir,'df_nation_day_rr_3.csv',sep=""));df$X=NULL;df$date=as.Date(df$date);names(df)
df$logGRP = log(df$GRP)
df$logPop = log(df$Population.resident.year.end)
df$CPI = df$CPI-100
df$Pension.shortfall = df$Pension.shortfall/1000000 # in millions

# drop winter months
df = df[substr(df$date,6,7) %in% c("12","01","02")==F,] 

m1 <- ivreg(protestsexMinoritySumPost7 ~ hexie | awindow3, data=df);summary(m1,diagnostics = TRUE)
m2 <- ivreg(protestsexMinoritySumPost7 ~ hexie + protestsexMinority.l1 + as.factor(year) | awindow3 + protestsexMinority.l1 + as.factor(year), data=df);summary(m2,diagnostics=TRUE)
m3 <- ivreg(protestsexMinoritySumPost7 ~ hexie + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year) | awindow3 + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year), data=df);summary(m3,diagnostics=TRUE)

m4 <- ivreg(protestsexMinoritySumPost7 ~ wending | awindow3, data=df);summary(m4, diagnostics=TRUE)
m5 <- ivreg(protestsexMinoritySumPost7 ~ wending + protestsexMinority.l1 + as.factor(year) | awindow3 + protestsexMinority.l1 + as.factor(year), data=df);summary(m5,diagnostics=TRUE)
m6 <- ivreg(protestsexMinoritySumPost7 ~ wending + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year) | awindow3 + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year), data=df);summary(m6,diagnostics=TRUE)

# second stage
stargazer(m4,m5,m6,m1,m2,m3,
          type="latex",
          title="IV Results: Second Stage (Ex Winter)",
          label="Table: IV Results Ex Winter",
          font.size="small",
          omit=c("year"),
          dep.var.labels = c("Protests$_{t+1:t+7}$"),
          add.lines=list(c("Year Fixed Effects","No","Yes","Yes","No","Yes","Yes")) ,
          order=c("wending","hexie","dwindow3","pwindow3","cwindow3","protestsexMinority.l1"),
          covariate.labels = c("Stability",
                               "Harmony",
                               "Democratic Anniversary",
                               "Political Anniversary",
                               "Cultural Anniversary",
                               "Protests$_{t-1}$",
                               "Log GRP",
                               "Log Population",
                               "Rural Population Share",
                               "Sex Ratio",
                               "Urban Unemployment Rate")) 

m1 <- lm(hexie ~ awindow3, data=df);summary(m1)
m2 <- lm(hexie ~ awindow3 + protestsexMinority.l1 + as.factor(year), data=df);summary(m2)
m3 <- lm(hexie ~ awindow3 + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year), data=df);summary(m3)

m4 <- lm(wending ~ awindow3, data=df);summary(m4)
m5 <- lm(wending ~ awindow3 + protestsexMinority.l1 + as.factor(year), data=df);summary(m5)
m6 <- lm(wending ~ awindow3 + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year), data=df);summary(m6)

stargazer(m4,m5,m6,m1,m2,m3,
          type="latex",
          title="IV Results: First Stage (Ex Winter)",
          label="Table: First Stage Ex Winter",
          font.size="small",
          omit=c("year"),
          dep.var.labels = c("``Stability''","``Harmony''"),
          add.lines=list(c("Year Fixed Effects","No","Yes","Yes","No","Yes","Yes")) ,
          order=c("awindow3","dwindow3","pwindow3","cwindow3","protestsexMinority.l1"),
          covariate.labels = c("Separatist Anniversary",
                               "Democratic Anniversary",
                               "Political Anniversary",
                               "Cultural Anniversary",
                               "Protests$_{t-1}$",
                               "Log GRP",
                               "Log Population",
                               "Rural Population Share",
                               "Sex Ratio",
                               "Urban Unemployment Rate"))


########################################################################################
# Tables 21 and 22: IV Results (Ex 3/10 Tibetan Rebellion, 5/23 Tibetan Liberation)    #
########################################################################################

df = read.csv(paste(datadir,'df_nation_day_rr_3.csv',sep=""));df$X=NULL;df$date=as.Date(df$date);names(df)
df$logGRP = log(df$GRP)
df$logPop = log(df$Population.resident.year.end)
df$CPI = df$CPI-100
df$Pension.shortfall = df$Pension.shortfall/1000000 # in millions

df$md = substr(df$date,6,10)
unique(df$md[df$awindow3==1])

# redefine awindow5
df$awindow5[df$md %in% c("03-05","03-06","03-07","03-08","03-09","03-10","03-11","03-12","03-13","03-14","03-15")] = NA
df$awindow5[df$md %in% c("05-18","05-19","05-20","05-21","05-22","05-23","05-24","05-25","05-26","05-27","05-28")] = NA

m1 <- ivreg(protestsexMinoritySumPost7 ~ hexie | awindow5, data=df);summary(m1,diagnostics = TRUE)
m2 <- ivreg(protestsexMinoritySumPost7 ~ hexie + protestsexMinority.l1 + as.factor(year) | awindow5 + protestsexMinority.l1 + as.factor(year), data=df);summary(m2,diagnostics=TRUE)
m3 <- ivreg(protestsexMinoritySumPost7 ~ hexie + protestsexMinority.l1 + dwindow5 + pwindow5 + cwindow5 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year) | awindow5 + protestsexMinority.l1 + dwindow5 + pwindow5 + cwindow5 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year), data=df);summary(m3,diagnostics=TRUE)

m4 <- ivreg(protestsexMinoritySumPost7 ~ wending | awindow5, data=df);summary(m4, diagnostics=TRUE)
m5 <- ivreg(protestsexMinoritySumPost7 ~ wending + protestsexMinority.l1 + as.factor(year) | awindow5 + protestsexMinority.l1 + as.factor(year), data=df);summary(m5,diagnostics=TRUE)
m6 <- ivreg(protestsexMinoritySumPost7 ~ wending + protestsexMinority.l1 + dwindow5 + pwindow5 + cwindow5 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year) | awindow5 + protestsexMinority.l1 + dwindow5 + pwindow5 + cwindow5 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year), data=df);summary(m6,diagnostics=TRUE)

# second stage
stargazer(m4,m5,m6,m1,m2,m3,
          type="latex",
          title="IV Results: Second Stage (Ex 3/10 Tibetan Rebellion, 5/23 Tibetan Liberation)",
          label="Table: IV Results Ex Tibetan Rebellion",
          font.size="small",
          omit=c("year"),
          dep.var.labels = c("Protests$_{t+1:t+7}$"),
          add.lines=list(c("Year Fixed Effects","No","Yes","Yes","No","Yes","Yes")) ,
          order=c("wending","hexie","dwindow5","pwindow5","cwindow5","protestsexMinority.l1"),
          covariate.labels = c("Stability",
                               "Harmony",
                               "Democratic Anniversary",
                               "Political Anniversary",
                               "Cultural Anniversary",
                               "Protests$_{t-1}$",
                               "Log GRP",
                               "Log Population",
                               "Rural Population Share",
                               "Sex Ratio",
                               "Urban Unemployment Rate")) 


m1 <- lm(hexie ~ awindow5, data=df);summary(m1)
m2 <- lm(hexie ~ awindow5 + protestsexMinority.l1 + as.factor(year), data=df);summary(m2)
m3 <- lm(hexie ~ awindow5 + protestsexMinority.l1 + dwindow5 + pwindow5 + cwindow5 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year), data=df);summary(m3)

m4 <- lm(wending ~ awindow5, data=df);summary(m4)
m5 <- lm(wending ~ awindow5 + protestsexMinority.l1 + as.factor(year), data=df);summary(m5)
m6 <- lm(wending ~ awindow5 + protestsexMinority.l1 + dwindow5 + pwindow5 + cwindow5 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year), data=df);summary(m6)

stargazer(m4,m5,m6,m1,m2,m3,
          type="latex",
          title="IV Results: First Stage (Ex Tibetan Rebellion)",
          label="Table: First Stage Ex Tibetan Rebellion",
          font.size="small",
          omit=c("year"),
          dep.var.labels = c("``Stability''","``Harmony''"),
          add.lines=list(c("Year Fixed Effects","No","Yes","Yes","No","Yes","Yes")) ,
          order=c("awindow5","dwindow5","pwindow5","cwindow5","protestsexMinority.l1"),
          covariate.labels = c("Separatist Anniversary",
                               "Democratic Anniversary",
                               "Political Anniversary",
                               "Cultural Anniversary",
                               "Protests$_{t-1}$",
                               "Log GRP",
                               "Log Population",
                               "Rural Population Share",
                               "Sex Ratio",
                               "Urban Unemployment Rate"))



#############################################################
# Tables 23 and 24: IV Results (Pre-emptive Detentions)     #
#############################################################

df = read.csv(paste(datadir,'df_nation_day_rr_3.csv',sep=""));df$X=NULL;df$date=as.Date(df$date);names(df)
df$logGRP = log(df$GRP)
df$logPop = log(df$Population.resident.year.end)
df$CPI = df$CPI-100
df$Pension.shortfall = df$Pension.shortfall/1000000 # in millions

# Data and prep code from Truex

cecc = read.csv(paste(datadir,'cecc.raw.csv',sep=""));df$X=NULL;df$date=as.Date(df$date);names(df)
cecc$date = as.Date(cecc$date.of.detention,format="%Y/%m/%d");head(cecc$date)
length(na.omit(cecc$date))/nrow(cecc) # only 67% of obs are fully dated

cecc$issue.flg<-0
cecc$issue.flg[grepl("FG", cecc$issue.category)==TRUE]<-1
cecc$issue.flg<-as.numeric(cecc$issue.flg)
sum(cecc$issue.flg)

cecc$issue.ethtib<-0
cecc$issue.ethtib[grepl("Tibetan", cecc$ethnic.group)==TRUE]<-1
cecc$issue.ethtib<-as.numeric(cecc$issue.ethtib)
sum(cecc$issue.ethtib)

cecc$issue.ethuyg<-0
cecc$issue.ethuyg[grepl("Uyghur", cecc$ethnic.group)==TRUE]<-1
cecc$issue.ethuyg<-as.numeric(cecc$issue.ethuyg)
sum(cecc$issue.ethuyg)

cecc$issue.dem<-0
cecc$issue.dem[grepl("dem", cecc$issue.category)==TRUE]<-1
cecc$issue.dem<-as.numeric(cecc$issue.dem)
sum(cecc$issue.dem)

cecc$issue.dem.ext1<-0
cecc$issue.dem.ext1[grepl("dem", cecc$issue.category)==TRUE]<-1
cecc$issue.dem.ext1[grepl("6489", cecc$issue.category)==TRUE]<-1
cecc$issue.dem.ext1<-as.numeric(cecc$issue.dem.ext1)
sum(cecc$issue.dem.ext1)

cecc$issue.dem.ext2<-0
cecc$issue.dem.ext2[grepl("dem", cecc$issue.category)==TRUE]<-1
cecc$issue.dem.ext2[grepl("6489", cecc$issue.category)==TRUE]<-1
cecc$issue.dem.ext2[grepl("spch", cecc$issue.category)==TRUE]<-1
cecc$issue.dem.ext2[grepl("civil", cecc$issue.category)==TRUE]<-1
cecc$issue.dem.ext2[grepl("info", cecc$issue.category)==TRUE]<-1
cecc$issue.dem.ext2[grepl("assoc", cecc$issue.category)==TRUE]<-1
cecc$issue.dem.ext2[grepl("Uyghur", cecc$ethnic.group)==TRUE]<-0
cecc$issue.dem.ext2[grepl("Tibetan", cecc$ethnic.group)==TRUE]<-0
cecc$issue.dem.ext2[grepl("FG", cecc$issue.category)==TRUE]<-0
cecc$issue.dem.ext2<-as.numeric(cecc$issue.dem.ext2)
sum(cecc$issue.dem.ext2)

# ignore NA dates in cecc
cecc = cecc[is.na(cecc$date)==F,];nrow(cecc)

# assume detentions are 0 if not measured within CECC date range
head(sort(cecc$date))
tail(sort(cecc$date))
df$detentions = NA
for(i in 1:nrow(df)){
  if(df$date[i]<min(cecc$date)){next}
  if(df$date[i]>max(cecc$date)){next}
  df$detentions[i] = nrow(cecc[cecc$date==df$date[i],])
}

# collapse to day level
cecc = ddply(cecc, .(date), plyr::summarize, 
             issue.flg=sum(issue.flg,na.rm=T),
             issue.ethtib=sum(issue.ethtib,na.rm=T),
             issue.ethuyg=sum(issue.ethuyg,na.rm=T),
             issue.dem=sum(issue.dem,na.rm=T),
             issue.dem.ext1=sum(issue.dem.ext1,na.rm=T),
             issue.dem.ext2=sum(issue.dem.ext2,na.rm=T))

df = join(df,cecc,type="left",by=c("date"));nrow(df)

# create +/-3 detention windows

df$detentionsdich = df$detentions
df$detentionsdich[df$detentions>0] = 1

df$detwindow3 = NA
n = 3
for(i in (n+1):nrow(df)){
  if(df$date[i]<min(cecc$date)){next}
  if(df$date[i]>max(cecc$date)){next}
  if(1 %in% df$detentionsdich[(i-n):(i+n)]){
    df$detwindow3[i] = 1
  }
  if((1 %in% df$detentionsdich[(i-n):(i+n)]) == F){
    df$detwindow3[i] = 0
  }
}

# number of detentions over past 1/2 weeks 

df$detentionsPre7 = NA
for(i in 8:nrow(df)){
  df$detentionsPre7[i] = sum(df$detentions[(i-1):(i-7)],na.rm=T)
}

df$detentionsPre14 = NA
for(i in 15:nrow(df)){
  df$detentionsPre14[i] = sum(df$detentions[(i-1):(i-14)],na.rm=T)
}
df$detentionsPre30 = NA
for(i in 31:nrow(df)){
  df$detentionsPre30[i] = sum(df$detentions[(i-1):(i-30)],na.rm=T)
}


m3 <- ivreg(protestsexMinoritySumPost7 ~ hexie + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + detentionsPre7 + as.factor(year) | awindow3 + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + detentionsPre7 + as.factor(year), data=df);summary(m3,diagnostics=TRUE)
m3.1 <- ivreg(protestsexMinoritySumPost7 ~ hexie + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + detentionsPre14 + as.factor(year) | awindow3 + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + detentionsPre14 + as.factor(year), data=df);summary(m3.1,diagnostics=TRUE)
m3.2 <- ivreg(protestsexMinoritySumPost7 ~ hexie + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + detentionsPre30 + as.factor(year) | awindow3 + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + detentionsPre30 + as.factor(year), data=df);summary(m3.2,diagnostics=TRUE)

m6 <- ivreg(protestsexMinoritySumPost7 ~ wending + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + detentionsPre7 + as.factor(year) | awindow3 + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + detentionsPre7 + as.factor(year), data=df);summary(m6,diagnostics=TRUE)
m6.1 <- ivreg(protestsexMinoritySumPost7 ~ wending + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + detentionsPre14 + as.factor(year) | awindow3 + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + detentionsPre14 + as.factor(year), data=df);summary(m6.1,diagnostics=TRUE)
m6.2 <- ivreg(protestsexMinoritySumPost7 ~ wending + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + detentionsPre30 + as.factor(year) | awindow3 + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + detentionsPre30 + as.factor(year), data=df);summary(m6.2,diagnostics=TRUE)

# second stage
stargazer(m6,m6.1,m6.2, m3,m3.1,m3.2,
          type="latex",
          title="IV Results: Second Stage (Pre-emptive Detentions)",
          label="Table: IV Results detentions",
          font.size="small",
          omit=c("year"),
          dep.var.labels = "Protests$_{t+1:t+7}$",
          add.lines=list(c("Year Fixed Effects","Yes","Yes","Yes","Yes","Yes","Yes")) ,
          order=c("wending","hexie","dwindow3","pwindow3","cwindow3","protestsexMinority.l1","logGRP","logPop","Population.rural.share",
                  "Sample.survey.sex.ratio","Urban.unemployment.rate","detentionsPre7","detentionsPre14","detentionsPre30"),
          covariate.labels = c("Stability",
                               "Harmony",
                               "Democratic Anniversary",
                               "Political Anniversary",
                               "Cultural Anniversary",
                               "Protests$_{t-1}$",
                               "Log GRP",
                               "Log Population",
                               "Rural Population Share",
                               "Sex Ratio",
                               "Urban Unemployment Rate",
                               "Detentions$_{t-1:t-7}$",
                               "Detentions$_{t-1:t-14}$",
                               "Detentions$_{t-1:t-30}$"
          )) 



m3 <- lm(hexie ~ awindow3 + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + detentionsPre7 + as.factor(year), data=df);summary(m3)
m3.1 <- lm(hexie ~ awindow3 + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + detentionsPre14 + as.factor(year), data=df);summary(m3.1)
m3.2 <- lm(hexie ~ awindow3 + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + detentionsPre30 + as.factor(year), data=df);summary(m3.2)

m6 <- lm(wending ~ awindow3 + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + detentionsPre7 + as.factor(year), data=df);summary(m6)
m6.1 <- lm(wending ~ awindow3 + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + detentionsPre14 + as.factor(year), data=df);summary(m6.1)
m6.2 <- lm(wending ~ awindow3 + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + detentionsPre30 + as.factor(year), data=df);summary(m6.2)

# first stage 
stargazer(m6,m6.1,m6.2, m3,m3.1,m3.2,
          type="latex",
          title="IV Results: First Stage (Pre-emptive Detentions)",
          label="Table: First Stage detentions",
          font.size="small",
          omit=c("year"),
          dep.var.labels = c("``Stability''","``Harmony''"),
          add.lines=list(c("Year Fixed Effects","Yes","Yes","Yes","Yes","Yes","Yes")) ,
          order=c("awindow3","dwindow3","pwindow3","cwindow3","protestsexMinority.l1","logGRP","logPop","Population.rural.share",
                  "Sample.survey.sex.ratio","Urban.unemployment.rate","detentionsPre7","detentionsPre14","detentionsPre30"),
          covariate.labels = c("Separatist Anniversary",
                               "Democratic Anniversary",
                               "Political Anniversary",
                               "Cultural Anniversary",
                               "Protests$_{t-1}$",
                               "Log GRP",
                               "Log Population",
                               "Rural Population Share",
                               "Sex Ratio",
                               "Urban Unemployment Rate",
                               "Detentions$_{t-1:t-7}$",
                               "Detentions$_{t-1:t-14}$",
                               "Detentions$_{t-1:t-30}$"
          )) 


########################################################################################
# Table 25: Separatist Anniversaries and Deferred Protests (+/- 3 Day Donut)           #
########################################################################################

# create donut var
df$awindow3donut = 0
n = 3
for(i in 6:(nrow(df)-6)){
  if(1 %in% df$awindow3[(i+1):(i+3)] & df$awindow3[i]!=1){
    df$awindow3donut[i] = 1
  }
  if(1 %in% df$awindow3[(i-1):(i-3)] & df$awindow3[i]!=1){
    df$awindow3donut[i] = 1
  }
}

# check
head(df[c("awindow3","awindow3donut")],100)

m1 <- lm(protestsexMinoritySumPost7 ~ social.stability + protestsexMinority.l1 + awindow3 + awindow3donut + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year), data=df);summary(m1)
m2 <- lm(protestsexMinoritySumPost7 ~ threats.topic + protestsexMinority.l1 + awindow3 + awindow3donut + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year), data=df);summary(m2)
m3 <- lm(protestsexMinoritySumPost7 ~ wending + protestsexMinority.l1 + awindow3 + awindow3donut + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year), data=df);summary(m3)
m4 <- lm(protestsexMinoritySumPost7 ~ hexie + protestsexMinority.l1 + awindow3 + awindow3donut + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year), data=df);summary(m4)

stargazer(m1,m2,m3,m4,type="latex",
          title="Separatist Anniversaries and Deferred Protests (+/-3 Day Donut)",
          label="Table: Donut",
          font.size="small",
          omit=c("year"),
          add.lines=list(c("Year Fixed Effects","Yes","Yes","Yes","Yes")),
          dep.var.labels = "Protests$_{t+1:t+7}$",
          covariate.labels = c("Social Stability",
                               "Law Enforcement",
                               "`Stability'",
                               "`Harmony'",
                               "Protests$_{t-1}$",
                               "Separatist Anniversary",
                               "Separatist Anniversary Donut",
                               "Democratic Anniversary",
                               "Political Anniversary",
                               "Cultural Anniversary",
                               "Log GRP",
                               "Log Population",
                               "Rural Population Share",
                               "Sex Ratio",
                               "Urban Unemployment Rate")) 

#########################################################
# Table 26: Intent to Treat Models                      #
#########################################################

df = read.csv(paste(datadir,'df_nation_day_rr_3.csv',sep=""));df$X=NULL;df$date=as.Date(df$date);names(df)
df$logGRP = log(df$GRP)
df$logPop = log(df$Population.resident.year.end)
df$CPI = df$CPI-100
df$Pension.shortfall = df$Pension.shortfall/1000000 # in millions

m1 <- lm(protestsexMinoritySumPost7 ~ awindow3, data=df);summary(m1)
m2 <- lm(protestsexMinoritySumPost7 ~ awindow3 + protestsexMinority.l1 + as.factor(year), data=df);summary(m2)
m3 <- lm(protestsexMinoritySumPost7 ~ awindow3 + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year), data=df);summary(m3)

m4 <- lm(protestsexMinoritySumPost7 ~ awindow3, data=df);summary(m4)
m5 <- lm(protestsexMinoritySumPost7 ~ awindow3 + protestsexMinority.l1 + as.factor(year), data=df);summary(m5)
m6 <- lm(protestsexMinoritySumPost7 ~ awindow3 + protestsexMinority.l1 + dwindow3 + pwindow3 + cwindow3 + logGRP + logPop + Population.rural.share + Sample.survey.sex.ratio + Urban.unemployment.rate + as.factor(year), data=df);summary(m6)

stargazer(m4,m5,m6,m1,m2,m3,
          type="latex",
          title="Intent to Treat Models",
          label="Table: ITT Results",
          font.size="small",
          omit=c("year"),
          dep.var.labels = "Protests$_{t+1:t+7}$",
          add.lines=list(c("Year Fixed Effects","No","Yes","Yes","No","Yes","Yes")) ,
          order=c("awindow","dwindow3","pwindow3","cwindow3","protestsexMinority.l1"),
          covariate.labels = c("Separatist Anniversary",
                               "Democratic Anniversary",
                               "Political Anniversary",
                               "Cultural Anniversary",
                               "Protests$_{t-1}$",
                               "Log GRP",
                               "Log Population",
                               "Rural Population Share",
                               "Sex Ratio",
                               "Urban Unemployment Rate")) 

#########################################################
# Appendix Table 27: Survey Balance                     #
#########################################################

df = read.csv(paste(datadir,'Survey1000ResponsesFormatted.csv',sep=""),header=T,stringsAsFactors=F);names(df)
df = df[3:nrow(df),]
df$Duration..in.seconds. = as.integer(as.vector(df$Duration..in.seconds.))
colnames(df)[colnames(df)=="Q33"] = "tibetRebellion"

table(df$sex)
705/(705+606)
606/(705+606)

df$age = 2018-as.numeric(as.vector(df$yob))
df$age1824 = 0
df$age1824[df$age<=24] = 1
df$age2534 = 0
df$age2534[df$age>=25 & df$age<=34] = 1
df$age3544 = 0
df$age3544[df$age>=35 & df$age<=44] = 1
df$age4554 = 0
df$age4554[df$age>=45 & df$age<=54] = 1
df$age5564 = 0
df$age5564[df$age>=55 & df$age<=64] = 1
df$age6574 = 0
df$age6574[df$age>=65 & df$age<=74] = 1
summary(df[c("age1824","age2534","age3544","age4554","age5564","age6574")])

table(df$income)
143/nrow(df)
212/nrow(df)
259/nrow(df)
306/nrow(df)
187/nrow(df)
70/nrow(df)

p = as.data.frame(table(df$region))
p$p = p$Freq/nrow(df)
p = p[c("Var1","p")]
p


#########################################################################
# Figure 9: No Evidence of Clustering Around Correct Anniversary Dates  #
#########################################################################

cutoff = 300 # median, 5 mins
df = df[df$Duration..in.seconds.<=cutoff,]
nrow(df) 

# standardize date format, turning both DKs and NAs into 'NA'
df$tibetRebellion = as.Date(df$tibetRebellion,"%m.%d",origin="1970-01-01")
df$tibetLiberation = as.Date(df$tibetLiberation,"%m.%d",origin="1970-01-01")
df$serfDay = as.Date(df$serfDay,"%m.%d",origin="1970-01-01")
df$urumqi = as.Date(df$urumqi,"%m.%d",origin="1970-01-01")
df$lama = as.Date(df$lama,"%m.%d",origin="1970-01-01")

df[c("tibetRebellionOff","tibetLiberationOff","serfDayOff","urumqiOff","lamaOff")] = NA

# automate 
mydates = c(as.Date("2020-03-10"),as.Date("2020-05-23"),as.Date("2020-03-28"),as.Date("2020-07-05"),as.Date("2020-07-06"))
myvars = c("tibetRebellion","tibetLiberation","serfDay","urumqi","lama")
offvars = c("tibetRebellionOff","tibetLiberationOff","serfDayOff","urumqiOff","lamaOff") # to count days off
dkvars = c("tibetRebellionDK","tibetLiberationDK","serfDayDK","urumqiDK","lamaDK") # to count DK responses

for(j in seq(myvars)){
  var = myvars[j]
  date = mydates[j]
  off = offvars[j]
  dk = dkvars[j]
  df[,off] = NA
  df[,dk] = NA
  print(paste(var,as.character(date),sep=": "))
  for(i in 1:nrow(df)){
    # if respondent guessed a date, count how far off it was
    if(is.na(df[i,var])==F){
      df[i,off] = abs(df[i,var] - date)
    }
    # if respondent did not guess a date, assign 1 in DK column
    if(is.na(df[i,var])==T){ 
      df[i,dk] = 1
    }
  }
}


par(mar=c(5, 8.5, 4, 0) + 0.1,xpd=TRUE)
j = 3 # jitter factor
k = 320 # where to plot DKs
df$x = 5
mycol = "coral3"
plot(as.Date(df$tibetLiberation), jitter(df$x,j),col=mycol,bty="n",ylim=c(0,6),ylab=NA,yaxt="n",xlab=NA)
mycol = "navy"
points(df$tibetRebellion, jitter(df$x,j)-1,col=mycol)
mycol = "forestgreen"
points(df$serfDay, jitter(df$x,j)-2,col=mycol)
mycol = "purple"
points(df$urumqi, jitter(df$x,j)-3,col=mycol)
mycol = "orange"
points(df$lama, jitter(df$x,j)-4,col=mycol)
myy=c("Tibetan Liberation   ","Tibetan Rebellion   ","Serf Day   ","Xinjiang Uprising   ","Dalai Lama Birthday   ")
mtext(at=c(5,4,3,2,1),myy, side=2, las=2)# line=1,srt=45,adj=0,xpd=T)

segments(mydates[2],5.5,mydates[2],4.5,lwd=4)
segments(mydates[1],4.5,mydates[1],3.5,lwd=4)
segments(mydates[3],3.5,mydates[3],2.5,lwd=4)
segments(mydates[4],2.5,mydates[4],1.5,lwd=4)
segments(mydates[5],1.5,mydates[5],0.5,lwd=4)

filename = "SurveyAccuracy6.pdf" 
#dev.print(paste(figdir,filename,sep=""),device=pdf)



